RED-7375:
* using ViewerDocumentService to draw TableExtractorResponse into documents
This commit is contained in:
parent
207d9dec97
commit
f4e93ef03b
@ -20,6 +20,9 @@ public record LayoutParsingRequest(
|
||||
@NonNull String originFileStorageId,//
|
||||
@Schema(description = "Optional Path to the table extraction file.")//
|
||||
Optional<String> tablesFileStorageId,//
|
||||
|
||||
@Schema(description= "Optional Path to the the table parsing service file")
|
||||
Optional<String> tableExtractorFileId,
|
||||
@Schema(description = "Optional Path to the image classification file.")//
|
||||
Optional<String> imagesFileStorageId,//
|
||||
|
||||
|
||||
@ -32,6 +32,7 @@ import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.C
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.ImageServiceResponseAdapter;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableCells;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.BodyTextFrameService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.RulingCleaningService;
|
||||
@ -87,12 +88,17 @@ public class LayoutParsingPipeline {
|
||||
imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.imagesFileStorageId().get());
|
||||
}
|
||||
|
||||
TableExtractorResponse tableExtractorResponse = new TableExtractorResponse();
|
||||
if(layoutParsingRequest.tableExtractorFileId().isPresent()) {
|
||||
tableExtractorResponse = layoutParsingStorageService.getExtractedTableFile(layoutParsingRequest.tableExtractorFileId().get());
|
||||
}
|
||||
|
||||
TableServiceResponse tableServiceResponse = new TableServiceResponse();
|
||||
if (layoutParsingRequest.tablesFileStorageId().isPresent()) {
|
||||
tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.tablesFileStorageId().get());
|
||||
}
|
||||
|
||||
ClassificationDocument classificationDocument = parseLayout(layoutParsingRequest.layoutParsingType(), originDocument, imageServiceResponse, tableServiceResponse);
|
||||
ClassificationDocument classificationDocument = parseLayout(layoutParsingRequest.layoutParsingType(), originDocument, imageServiceResponse, tableServiceResponse, tableExtractorResponse);
|
||||
Document documentGraph = DocumentGraphFactory.buildDocumentGraph(classificationDocument);
|
||||
|
||||
int numberOfPages = originDocument.getNumberOfPages();
|
||||
@ -102,6 +108,7 @@ public class LayoutParsingPipeline {
|
||||
|
||||
try (var out = new ByteArrayOutputStream()) {
|
||||
viewerDocumentService.createViewerDocument(originDocument, documentGraph, out, false);
|
||||
viewerDocumentService.drawExtractedTables(originDocument,documentGraph,out,tableExtractorResponse.getExtractedTableData());
|
||||
layoutParsingStorageService.storeViewerDocument(layoutParsingRequest, out);
|
||||
}
|
||||
|
||||
@ -157,10 +164,11 @@ public class LayoutParsingPipeline {
|
||||
public ClassificationDocument parseLayout(LayoutParsingType layoutParsingType,
|
||||
PDDocument originDocument,
|
||||
ImageServiceResponse imageServiceResponse,
|
||||
TableServiceResponse tableServiceResponse) {
|
||||
TableServiceResponse tableServiceResponse, TableExtractorResponse tableExtractorResponse) {
|
||||
|
||||
Map<Integer, List<TableCells>> pdfTableCells = cvTableParsingAdapter.buildCvParsedTablesPerPage(tableServiceResponse);
|
||||
Map<Integer, List<ClassifiedImage>> pdfImages = imageServiceResponseAdapter.buildClassifiedImagesPerPage(imageServiceResponse);
|
||||
//Hier muss ich die table cells einlesen
|
||||
|
||||
ClassificationDocument classificationDocument = new ClassificationDocument();
|
||||
List<ClassificationPage> classificationPages = new ArrayList<>();
|
||||
|
||||
@ -23,6 +23,8 @@ import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.Si
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.taas.ResearchDocumentData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorData;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||
|
||||
@ -62,6 +64,14 @@ public class LayoutParsingStorageService {
|
||||
}
|
||||
}
|
||||
|
||||
public TableExtractorResponse getExtractedTableFile(String storageId) throws IOException {
|
||||
try (InputStream inputStream = getObject(storageId)) {
|
||||
TableExtractorResponse tableExtractorResponse = objectMapper.readValue(inputStream,TableExtractorResponse.class);
|
||||
inputStream.close();
|
||||
return tableExtractorResponse;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public TableServiceResponse getTablesFile(String storageId) throws IOException {
|
||||
|
||||
|
||||
@ -0,0 +1,17 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class ExtractedTable {
|
||||
private boolean rotated;
|
||||
private ExtractedTableData extractedTableValue;
|
||||
|
||||
|
||||
}
|
||||
@ -0,0 +1,19 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class ExtractedTableData {
|
||||
private String label;
|
||||
private float score;
|
||||
private List<Float> boundingBox;
|
||||
|
||||
}
|
||||
@ -0,0 +1,25 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class TableExtractorData {
|
||||
|
||||
private int pageNumber;
|
||||
private int pageRotation;
|
||||
private int imageHeigth;
|
||||
private int imageWidth;
|
||||
private float pdfHeight;
|
||||
private float pdfWidth;
|
||||
private int dpi;
|
||||
private List<ExtractedTable> tables;
|
||||
private List<ExtractedTableData> objects;
|
||||
}
|
||||
@ -0,0 +1,22 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class TableExtractorResponse { private String dossierId;
|
||||
private String fileId;
|
||||
private String targetFileExtension;
|
||||
private String responseFileExtension;
|
||||
private String X_TENANT_ID;
|
||||
private List<TableExtractorData> extractedTableData;
|
||||
|
||||
|
||||
}
|
||||
@ -1,12 +1,22 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.visualization;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.awt.geom.AffineTransform;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.print.Doc;
|
||||
|
||||
import org.apache.pdfbox.Loader;
|
||||
import org.apache.pdfbox.cos.COSDictionary;
|
||||
import org.apache.pdfbox.cos.COSName;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
@ -23,13 +33,21 @@ import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState;
|
||||
import org.apache.pdfbox.util.Matrix;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Rectangle;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.visualization.ColoredLine;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.visualization.ColoredRectangle;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.visualization.FilledRectangle;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.visualization.LayoutGrid;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.visualization.PlacedText;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.visualization.VisualizationsOnPage;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.ExtractedTable;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.ExtractedTableData;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorData;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.PdfVisualisationUtility;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
@ -47,6 +65,45 @@ public class ViewerDocumentService {
|
||||
|
||||
private final LayoutGridService layoutGridService;
|
||||
|
||||
@SneakyThrows
|
||||
public void drawExtractedTables(PDDocument pdDocument, Document document, OutputStream outputStream, List<TableExtractorData> tableExtractorData) {
|
||||
|
||||
for (TableExtractorData tableExtractorDatum : tableExtractorData) {
|
||||
int pageNumber = tableExtractorDatum.getPageNumber();
|
||||
List<Rectangle2D> tableRectangles = new ArrayList<>();
|
||||
List<Rectangle2D> objectRectangles = new ArrayList<>();
|
||||
for (ExtractedTable table : tableExtractorDatum.getTables()) {
|
||||
List<Float> boundingBox = table.getExtractedTableValue().getBoundingBox();
|
||||
float x0 = boundingBox.get(0);
|
||||
float x1 = boundingBox.get(2);
|
||||
float y0 = boundingBox.get(1);
|
||||
float y1 = boundingBox.get(3);
|
||||
Rectangle2D tableRectangle = new Rectangle(y0, x0, x1 - x0, y1 - y0);
|
||||
tableRectangles.add(tableRectangle);
|
||||
}
|
||||
for (ExtractedTableData object : tableExtractorDatum.getObjects()) {
|
||||
List<Float> boundingBox = object.getBoundingBox();
|
||||
float x0 = boundingBox.get(0);
|
||||
float x1 = boundingBox.get(2);
|
||||
float y0 = boundingBox.get(1);
|
||||
float y1 = boundingBox.get(3);
|
||||
Rectangle2D objectRectangle = new Rectangle(y0, x0, x1 - x0, y1 - y0);
|
||||
objectRectangles.add(objectRectangle);
|
||||
}
|
||||
PdfVisualisationUtility.drawRectangle2DList(pdDocument,
|
||||
pageNumber,
|
||||
tableRectangles,
|
||||
PdfVisualisationUtility.Options.builder().strokeColor(Color.PINK).strokeWidth(1).stroke(true).build());
|
||||
PdfVisualisationUtility.drawRectangle2DList(pdDocument,
|
||||
pageNumber,
|
||||
objectRectangles,
|
||||
PdfVisualisationUtility.Options.builder().strokeColor(Color.CYAN).strokeWidth(1).stroke(true).build());
|
||||
}
|
||||
pdDocument.save(outputStream);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public void createViewerDocument(PDDocument pdDocument, Document document, OutputStream outputStream, boolean layerVisibilityDefaultValue) {
|
||||
|
||||
@ -26,6 +26,7 @@ import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipelin
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.TaasDocumentDataMapper;
|
||||
@ -50,7 +51,7 @@ public class BdrJsonBuildTest extends AbstractTest {
|
||||
return DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.TAAS,
|
||||
pdDocument,
|
||||
new ImageServiceResponse(),
|
||||
new TableServiceResponse()));
|
||||
new TableServiceResponse(), new TableExtractorResponse()));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -33,6 +33,7 @@ import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipelin
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
|
||||
import com.knecon.fforesight.tenantcommons.TenantsClient;
|
||||
@ -98,7 +99,7 @@ public class HeadlinesGoldStandardIntegrationTest {
|
||||
Document documentGraph = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
|
||||
Loader.loadPDF(pdfFileResource.getFile()),
|
||||
new ImageServiceResponse(),
|
||||
new TableServiceResponse()));
|
||||
new TableServiceResponse(), new TableExtractorResponse()));
|
||||
|
||||
var foundHeadlines = documentGraph.streamAllSubNodes()
|
||||
.map(SemanticNode::getHeadline)
|
||||
|
||||
@ -16,6 +16,7 @@ import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.Do
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper;
|
||||
@ -58,7 +59,7 @@ public class DocumentGraphJsonWritingTest extends BuildDocumentTest {
|
||||
Document documentGraph = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
|
||||
Loader.loadPDF(filename.toFile()),
|
||||
new ImageServiceResponse(),
|
||||
new TableServiceResponse()));
|
||||
new TableServiceResponse(), new TableExtractorResponse()));
|
||||
|
||||
DocumentData documentData = DocumentDataMapper.toDocumentData(documentGraph);
|
||||
ObjectMapper mapper = ObjectMapperFactory.create();
|
||||
|
||||
@ -1,27 +1,50 @@
|
||||
package com.knecon.fforesight.service.layoutparser.server.graph;
|
||||
|
||||
import static com.knecon.fforesight.service.layoutparser.server.utils.visualizations.PdfDraw.drawRectangle2DList;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.awt.geom.RectangularShape;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.pdfbox.Loader;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.GapInformation;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.LineInformation;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Rectangle;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.GapDetectionService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.GapsAcrossLinesService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.LineDetectionService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.classification.RedactManagerClassificationService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
|
||||
@ -29,7 +52,9 @@ import com.knecon.fforesight.service.layoutparser.processor.services.mapper.Docu
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.PropertiesMapper;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.visualization.LayoutGridService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.visualization.ViewerDocumentService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.PdfVisualisationUtility;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.BuildDocumentTest;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.visualizations.PdfDraw;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@ -55,12 +80,61 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void testLayoutParsingServiceResults() {
|
||||
String tableSourceFileName ="C:\\Users\\YannikHampe\\Downloads\\3875a78f1db6ff94b05e38446e65ba9a.EXTRACTED_TABLES.json\\3875a78f1db6ff94b05e38446e65ba9a.EXTRACTED_TABLES.json";
|
||||
Path pdfFileResource = Path.of("C:\\Users\\YannikHampe\\Downloads\\2009-1048395_50pages_tables.pdf");
|
||||
String tmpFileName = "C:/Users/YANNIK~1/AppData/Local/Temp/rectangles."+pdfFileResource.getFileName();
|
||||
ObjectMapper objectMapper = new ObjectMapper();
|
||||
PDDocument pdDocument = Loader.loadPDF(pdfFileResource.toFile());
|
||||
JsonNode jsonNode = objectMapper.readTree(new String(Files.readAllBytes(new File(tableSourceFileName).toPath())));
|
||||
JsonNode dataNode = jsonNode.get("data");
|
||||
|
||||
dataNode.forEach(node -> {
|
||||
List<Rectangle2D> rectangles = new ArrayList<>();
|
||||
int pageNumber = node.get("page_number").asInt()+1;
|
||||
JsonNode tables = node.get("tables");
|
||||
tables.forEach(entry -> {
|
||||
JsonNode table = entry.get("table");
|
||||
//table bounding box
|
||||
if(Float.valueOf(String.valueOf(table.get("score"))) < 0.99) {
|
||||
return;
|
||||
}
|
||||
JsonNode tableBox = table.get("bbox");
|
||||
float x0 = Float.valueOf(tableBox.get(0).toString());
|
||||
float x1 = Float.valueOf(tableBox.get(2).toString());
|
||||
float y0 = Float.valueOf(tableBox.get(1).toString());
|
||||
float y1 = Float.valueOf(tableBox.get(3).toString());
|
||||
Rectangle2D rectangle2D = new Rectangle(y0, x0, x1 - x0, y1 - y0);
|
||||
rectangles.add(rectangle2D);
|
||||
//columns and rows
|
||||
JsonNode rowsAndColumns = entry.get("objects");
|
||||
rowsAndColumns.forEach(rowOrColumn -> {
|
||||
JsonNode bbox = rowOrColumn.get("bbox");
|
||||
float rx0 = Float.valueOf(bbox.get(0).toString());
|
||||
float rx1 = Float.valueOf(bbox.get(2).toString());
|
||||
float ry0 = Float.valueOf(bbox.get(1).toString());
|
||||
float ry1 = Float.valueOf(bbox.get(3).toString());
|
||||
Rectangle2D rowOrColumnRectangle = new Rectangle(ry0, rx0, rx1 - rx0, ry1 - ry0);
|
||||
rectangles.add(rowOrColumnRectangle);
|
||||
});
|
||||
});
|
||||
PdfVisualisationUtility.drawRectangle2DList(pdDocument, pageNumber, rectangles, PdfVisualisationUtility.Options.builder().strokeColor(Color.GREEN).strokeWidth(2).stroke(true).build());
|
||||
|
||||
});
|
||||
try (var out = new FileOutputStream(tmpFileName)) {
|
||||
pdDocument.save(out);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public ClassificationDocument buildClassificationDocument(PDDocument originDocument) {
|
||||
|
||||
ClassificationDocument classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
|
||||
originDocument,
|
||||
new ImageServiceResponse(),
|
||||
new TableServiceResponse());
|
||||
new TableServiceResponse(), new TableExtractorResponse());
|
||||
|
||||
redactManagerClassificationService.classifyDocument(classificationDocument);
|
||||
|
||||
|
||||
@ -33,6 +33,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePag
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.CvTableParsingAdapter;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.ImageServiceResponseAdapter;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.classification.RedactManagerClassificationService;
|
||||
@ -66,7 +67,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
||||
ClassificationDocument classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
|
||||
originDocument,
|
||||
new ImageServiceResponse(),
|
||||
new TableServiceResponse());
|
||||
new TableServiceResponse(), new TableExtractorResponse());
|
||||
|
||||
redactManagerClassificationService.classifyDocument(classificationDocument);
|
||||
|
||||
|
||||
@ -20,6 +20,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Do
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.PageContentExtractor;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.RulingCleaningService;
|
||||
@ -79,11 +80,11 @@ public class RulingCleaningServiceTest extends BuildDocumentTest {
|
||||
Document documentGraphBefore = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
|
||||
Loader.loadPDF(filename.toFile()),
|
||||
new ImageServiceResponse(),
|
||||
new TableServiceResponse()));
|
||||
new TableServiceResponse(), new TableExtractorResponse()));
|
||||
Document documentGraphAfter = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
|
||||
Loader.loadPDF(filename.toFile()),
|
||||
new ImageServiceResponse(),
|
||||
new TableServiceResponse()));
|
||||
new TableServiceResponse(), new TableExtractorResponse()));
|
||||
DocumentData documentDataBefore = DocumentDataMapper.toDocumentData(documentGraphBefore);
|
||||
DocumentData documentDataAfter = DocumentDataMapper.toDocumentData(documentGraphAfter);
|
||||
if (!compareStructures(documentDataBefore.getDocumentStructure(), documentDataAfter.getDocumentStructure(), filename.getFileName().toString())) {
|
||||
|
||||
@ -11,6 +11,7 @@ import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsi
|
||||
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipeline;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
|
||||
|
||||
@ -28,7 +29,7 @@ public abstract class BuildDocumentTest extends AbstractTest {
|
||||
ClassPathResource fileResource = new ClassPathResource(filename);
|
||||
prepareStorage(filename);
|
||||
try (InputStream inputStream = fileResource.getInputStream(); PDDocument pdDocument = Loader.loadPDF(inputStream.readAllBytes())) {
|
||||
return layoutParsingPipeline.parseLayout(layoutParsingType, pdDocument, layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), new TableServiceResponse());
|
||||
return layoutParsingPipeline.parseLayout(layoutParsingType, pdDocument, layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), new TableServiceResponse(), new TableExtractorResponse());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user