RED-7375:

* using ViewerDocumentService to draw TableExtractorResponse into documents
This commit is contained in:
yhampe 2023-11-28 15:14:04 +01:00
parent 207d9dec97
commit f4e93ef03b
15 changed files with 251 additions and 10 deletions

View File

@ -20,6 +20,9 @@ public record LayoutParsingRequest(
@NonNull String originFileStorageId,//
@Schema(description = "Optional Path to the table extraction file.")//
Optional<String> tablesFileStorageId,//
@Schema(description= "Optional Path to the the table parsing service file")
Optional<String> tableExtractorFileId,
@Schema(description = "Optional Path to the image classification file.")//
Optional<String> imagesFileStorageId,//

View File

@ -32,6 +32,7 @@ import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.C
import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.ImageServiceResponseAdapter;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableCells;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.services.BodyTextFrameService;
import com.knecon.fforesight.service.layoutparser.processor.services.RulingCleaningService;
@ -87,12 +88,17 @@ public class LayoutParsingPipeline {
imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.imagesFileStorageId().get());
}
TableExtractorResponse tableExtractorResponse = new TableExtractorResponse();
if(layoutParsingRequest.tableExtractorFileId().isPresent()) {
tableExtractorResponse = layoutParsingStorageService.getExtractedTableFile(layoutParsingRequest.tableExtractorFileId().get());
}
TableServiceResponse tableServiceResponse = new TableServiceResponse();
if (layoutParsingRequest.tablesFileStorageId().isPresent()) {
tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.tablesFileStorageId().get());
}
ClassificationDocument classificationDocument = parseLayout(layoutParsingRequest.layoutParsingType(), originDocument, imageServiceResponse, tableServiceResponse);
ClassificationDocument classificationDocument = parseLayout(layoutParsingRequest.layoutParsingType(), originDocument, imageServiceResponse, tableServiceResponse, tableExtractorResponse);
Document documentGraph = DocumentGraphFactory.buildDocumentGraph(classificationDocument);
int numberOfPages = originDocument.getNumberOfPages();
@ -102,6 +108,7 @@ public class LayoutParsingPipeline {
try (var out = new ByteArrayOutputStream()) {
viewerDocumentService.createViewerDocument(originDocument, documentGraph, out, false);
viewerDocumentService.drawExtractedTables(originDocument,documentGraph,out,tableExtractorResponse.getExtractedTableData());
layoutParsingStorageService.storeViewerDocument(layoutParsingRequest, out);
}
@ -157,10 +164,11 @@ public class LayoutParsingPipeline {
public ClassificationDocument parseLayout(LayoutParsingType layoutParsingType,
PDDocument originDocument,
ImageServiceResponse imageServiceResponse,
TableServiceResponse tableServiceResponse) {
TableServiceResponse tableServiceResponse, TableExtractorResponse tableExtractorResponse) {
Map<Integer, List<TableCells>> pdfTableCells = cvTableParsingAdapter.buildCvParsedTablesPerPage(tableServiceResponse);
Map<Integer, List<ClassifiedImage>> pdfImages = imageServiceResponseAdapter.buildClassifiedImagesPerPage(imageServiceResponse);
//Hier muss ich die table cells einlesen
ClassificationDocument classificationDocument = new ClassificationDocument();
List<ClassificationPage> classificationPages = new ArrayList<>();

View File

@ -23,6 +23,8 @@ import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.Si
import com.knecon.fforesight.service.layoutparser.internal.api.data.taas.ResearchDocumentData;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorData;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
import com.knecon.fforesight.tenantcommons.TenantContext;
@ -62,6 +64,14 @@ public class LayoutParsingStorageService {
}
}
public TableExtractorResponse getExtractedTableFile(String storageId) throws IOException {
try (InputStream inputStream = getObject(storageId)) {
TableExtractorResponse tableExtractorResponse = objectMapper.readValue(inputStream,TableExtractorResponse.class);
inputStream.close();
return tableExtractorResponse;
}
}
public TableServiceResponse getTablesFile(String storageId) throws IOException {

View File

@ -0,0 +1,17 @@
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class ExtractedTable {
private boolean rotated;
private ExtractedTableData extractedTableValue;
}

View File

@ -0,0 +1,19 @@
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table;
import java.util.List;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class ExtractedTableData {
private String label;
private float score;
private List<Float> boundingBox;
}

View File

@ -0,0 +1,25 @@
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table;
import java.util.List;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class TableExtractorData {
private int pageNumber;
private int pageRotation;
private int imageHeigth;
private int imageWidth;
private float pdfHeight;
private float pdfWidth;
private int dpi;
private List<ExtractedTable> tables;
private List<ExtractedTableData> objects;
}

View File

@ -0,0 +1,22 @@
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table;
import java.util.List;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class TableExtractorResponse { private String dossierId;
private String fileId;
private String targetFileExtension;
private String responseFileExtension;
private String X_TENANT_ID;
private List<TableExtractorData> extractedTableData;
}

View File

@ -1,12 +1,22 @@
package com.knecon.fforesight.service.layoutparser.processor.services.visualization;
import java.awt.Color;
import java.awt.geom.AffineTransform;
import java.awt.geom.Rectangle2D;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import javax.print.Doc;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
@ -23,13 +33,21 @@ import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState;
import org.apache.pdfbox.util.Matrix;
import org.springframework.stereotype.Service;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.model.table.Rectangle;
import com.knecon.fforesight.service.layoutparser.processor.model.visualization.ColoredLine;
import com.knecon.fforesight.service.layoutparser.processor.model.visualization.ColoredRectangle;
import com.knecon.fforesight.service.layoutparser.processor.model.visualization.FilledRectangle;
import com.knecon.fforesight.service.layoutparser.processor.model.visualization.LayoutGrid;
import com.knecon.fforesight.service.layoutparser.processor.model.visualization.PlacedText;
import com.knecon.fforesight.service.layoutparser.processor.model.visualization.VisualizationsOnPage;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.ExtractedTable;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.ExtractedTableData;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorData;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorResponse;
import com.knecon.fforesight.service.layoutparser.processor.utils.PdfVisualisationUtility;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
@ -47,6 +65,45 @@ public class ViewerDocumentService {
private final LayoutGridService layoutGridService;
@SneakyThrows
public void drawExtractedTables(PDDocument pdDocument, Document document, OutputStream outputStream, List<TableExtractorData> tableExtractorData) {
for (TableExtractorData tableExtractorDatum : tableExtractorData) {
int pageNumber = tableExtractorDatum.getPageNumber();
List<Rectangle2D> tableRectangles = new ArrayList<>();
List<Rectangle2D> objectRectangles = new ArrayList<>();
for (ExtractedTable table : tableExtractorDatum.getTables()) {
List<Float> boundingBox = table.getExtractedTableValue().getBoundingBox();
float x0 = boundingBox.get(0);
float x1 = boundingBox.get(2);
float y0 = boundingBox.get(1);
float y1 = boundingBox.get(3);
Rectangle2D tableRectangle = new Rectangle(y0, x0, x1 - x0, y1 - y0);
tableRectangles.add(tableRectangle);
}
for (ExtractedTableData object : tableExtractorDatum.getObjects()) {
List<Float> boundingBox = object.getBoundingBox();
float x0 = boundingBox.get(0);
float x1 = boundingBox.get(2);
float y0 = boundingBox.get(1);
float y1 = boundingBox.get(3);
Rectangle2D objectRectangle = new Rectangle(y0, x0, x1 - x0, y1 - y0);
objectRectangles.add(objectRectangle);
}
PdfVisualisationUtility.drawRectangle2DList(pdDocument,
pageNumber,
tableRectangles,
PdfVisualisationUtility.Options.builder().strokeColor(Color.PINK).strokeWidth(1).stroke(true).build());
PdfVisualisationUtility.drawRectangle2DList(pdDocument,
pageNumber,
objectRectangles,
PdfVisualisationUtility.Options.builder().strokeColor(Color.CYAN).strokeWidth(1).stroke(true).build());
}
pdDocument.save(outputStream);
}
@SneakyThrows
public void createViewerDocument(PDDocument pdDocument, Document document, OutputStream outputStream, boolean layerVisibilityDefaultValue) {

View File

@ -26,6 +26,7 @@ import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipelin
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.TaasDocumentDataMapper;
@ -50,7 +51,7 @@ public class BdrJsonBuildTest extends AbstractTest {
return DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.TAAS,
pdDocument,
new ImageServiceResponse(),
new TableServiceResponse()));
new TableServiceResponse(), new TableExtractorResponse()));
}
}

View File

@ -33,6 +33,7 @@ import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipelin
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
import com.knecon.fforesight.tenantcommons.TenantsClient;
@ -98,7 +99,7 @@ public class HeadlinesGoldStandardIntegrationTest {
Document documentGraph = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
Loader.loadPDF(pdfFileResource.getFile()),
new ImageServiceResponse(),
new TableServiceResponse()));
new TableServiceResponse(), new TableExtractorResponse()));
var foundHeadlines = documentGraph.streamAllSubNodes()
.map(SemanticNode::getHeadline)

View File

@ -16,6 +16,7 @@ import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.Do
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper;
@ -58,7 +59,7 @@ public class DocumentGraphJsonWritingTest extends BuildDocumentTest {
Document documentGraph = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
Loader.loadPDF(filename.toFile()),
new ImageServiceResponse(),
new TableServiceResponse()));
new TableServiceResponse(), new TableExtractorResponse()));
DocumentData documentData = DocumentDataMapper.toDocumentData(documentGraph);
ObjectMapper mapper = ObjectMapperFactory.create();

View File

@ -1,27 +1,50 @@
package com.knecon.fforesight.service.layoutparser.server.graph;
import static com.knecon.fforesight.service.layoutparser.server.utils.visualizations.PdfDraw.drawRectangle2DList;
import java.awt.Color;
import java.awt.geom.Rectangle2D;
import java.awt.geom.RectangularShape;
import java.io.File;
import java.io.FileOutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.json.JSONArray;
import org.json.JSONObject;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.ClassPathResource;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
import com.knecon.fforesight.service.layoutparser.processor.model.GapInformation;
import com.knecon.fforesight.service.layoutparser.processor.model.LineInformation;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
import com.knecon.fforesight.service.layoutparser.processor.model.table.Rectangle;
import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.services.GapDetectionService;
import com.knecon.fforesight.service.layoutparser.processor.services.GapsAcrossLinesService;
import com.knecon.fforesight.service.layoutparser.processor.services.LineDetectionService;
import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService;
import com.knecon.fforesight.service.layoutparser.processor.services.classification.RedactManagerClassificationService;
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
@ -29,7 +52,9 @@ import com.knecon.fforesight.service.layoutparser.processor.services.mapper.Docu
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.PropertiesMapper;
import com.knecon.fforesight.service.layoutparser.processor.services.visualization.LayoutGridService;
import com.knecon.fforesight.service.layoutparser.processor.services.visualization.ViewerDocumentService;
import com.knecon.fforesight.service.layoutparser.processor.utils.PdfVisualisationUtility;
import com.knecon.fforesight.service.layoutparser.server.utils.BuildDocumentTest;
import com.knecon.fforesight.service.layoutparser.server.utils.visualizations.PdfDraw;
import lombok.SneakyThrows;
@ -55,12 +80,61 @@ public class ViewerDocumentTest extends BuildDocumentTest {
}
}
@Test
@SneakyThrows
public void testLayoutParsingServiceResults() {
String tableSourceFileName ="C:\\Users\\YannikHampe\\Downloads\\3875a78f1db6ff94b05e38446e65ba9a.EXTRACTED_TABLES.json\\3875a78f1db6ff94b05e38446e65ba9a.EXTRACTED_TABLES.json";
Path pdfFileResource = Path.of("C:\\Users\\YannikHampe\\Downloads\\2009-1048395_50pages_tables.pdf");
String tmpFileName = "C:/Users/YANNIK~1/AppData/Local/Temp/rectangles."+pdfFileResource.getFileName();
ObjectMapper objectMapper = new ObjectMapper();
PDDocument pdDocument = Loader.loadPDF(pdfFileResource.toFile());
JsonNode jsonNode = objectMapper.readTree(new String(Files.readAllBytes(new File(tableSourceFileName).toPath())));
JsonNode dataNode = jsonNode.get("data");
dataNode.forEach(node -> {
List<Rectangle2D> rectangles = new ArrayList<>();
int pageNumber = node.get("page_number").asInt()+1;
JsonNode tables = node.get("tables");
tables.forEach(entry -> {
JsonNode table = entry.get("table");
//table bounding box
if(Float.valueOf(String.valueOf(table.get("score"))) < 0.99) {
return;
}
JsonNode tableBox = table.get("bbox");
float x0 = Float.valueOf(tableBox.get(0).toString());
float x1 = Float.valueOf(tableBox.get(2).toString());
float y0 = Float.valueOf(tableBox.get(1).toString());
float y1 = Float.valueOf(tableBox.get(3).toString());
Rectangle2D rectangle2D = new Rectangle(y0, x0, x1 - x0, y1 - y0);
rectangles.add(rectangle2D);
//columns and rows
JsonNode rowsAndColumns = entry.get("objects");
rowsAndColumns.forEach(rowOrColumn -> {
JsonNode bbox = rowOrColumn.get("bbox");
float rx0 = Float.valueOf(bbox.get(0).toString());
float rx1 = Float.valueOf(bbox.get(2).toString());
float ry0 = Float.valueOf(bbox.get(1).toString());
float ry1 = Float.valueOf(bbox.get(3).toString());
Rectangle2D rowOrColumnRectangle = new Rectangle(ry0, rx0, rx1 - rx0, ry1 - ry0);
rectangles.add(rowOrColumnRectangle);
});
});
PdfVisualisationUtility.drawRectangle2DList(pdDocument, pageNumber, rectangles, PdfVisualisationUtility.Options.builder().strokeColor(Color.GREEN).strokeWidth(2).stroke(true).build());
});
try (var out = new FileOutputStream(tmpFileName)) {
pdDocument.save(out);
}
}
public ClassificationDocument buildClassificationDocument(PDDocument originDocument) {
ClassificationDocument classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
originDocument,
new ImageServiceResponse(),
new TableServiceResponse());
new TableServiceResponse(), new TableExtractorResponse());
redactManagerClassificationService.classifyDocument(classificationDocument);

View File

@ -33,6 +33,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePag
import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.CvTableParsingAdapter;
import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.ImageServiceResponseAdapter;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService;
import com.knecon.fforesight.service.layoutparser.processor.services.classification.RedactManagerClassificationService;
@ -66,7 +67,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
ClassificationDocument classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
originDocument,
new ImageServiceResponse(),
new TableServiceResponse());
new TableServiceResponse(), new TableExtractorResponse());
redactManagerClassificationService.classifyDocument(classificationDocument);

View File

@ -20,6 +20,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Do
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.services.PageContentExtractor;
import com.knecon.fforesight.service.layoutparser.processor.services.RulingCleaningService;
@ -79,11 +80,11 @@ public class RulingCleaningServiceTest extends BuildDocumentTest {
Document documentGraphBefore = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
Loader.loadPDF(filename.toFile()),
new ImageServiceResponse(),
new TableServiceResponse()));
new TableServiceResponse(), new TableExtractorResponse()));
Document documentGraphAfter = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
Loader.loadPDF(filename.toFile()),
new ImageServiceResponse(),
new TableServiceResponse()));
new TableServiceResponse(), new TableExtractorResponse()));
DocumentData documentDataBefore = DocumentDataMapper.toDocumentData(documentGraphBefore);
DocumentData documentDataAfter = DocumentDataMapper.toDocumentData(documentGraphAfter);
if (!compareStructures(documentDataBefore.getDocumentStructure(), documentDataAfter.getDocumentStructure(), filename.getFileName().toString())) {

View File

@ -11,6 +11,7 @@ import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsi
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipeline;
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
@ -28,7 +29,7 @@ public abstract class BuildDocumentTest extends AbstractTest {
ClassPathResource fileResource = new ClassPathResource(filename);
prepareStorage(filename);
try (InputStream inputStream = fileResource.getInputStream(); PDDocument pdDocument = Loader.loadPDF(inputStream.readAllBytes())) {
return layoutParsingPipeline.parseLayout(layoutParsingType, pdDocument, layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), new TableServiceResponse());
return layoutParsingPipeline.parseLayout(layoutParsingType, pdDocument, layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), new TableServiceResponse(), new TableExtractorResponse());
}
}