RED-7375 table extractor prototype

created new branch because old one broke
This commit is contained in:
yhampe 2024-02-01 13:05:53 +01:00
parent ed376f1872
commit a7c63b8f5a
2 changed files with 12 additions and 10 deletions

View File

@ -54,10 +54,10 @@ public class TableExtractorResponseAdapter {
parsedTableCells.add(tableCells);
t.getObjects().forEach(o -> {
TableExtractorCells objectCell = new TableExtractorCells();
objectCell.setX0(t.getTable().getBbox().get(0));
objectCell.setX1(t.getTable().getBbox().get(2));
objectCell.setY0(t.getTable().getBbox().get(1));
objectCell.setY1(t.getTable().getBbox().get(3));
objectCell.setX0(o.getBbox().get(0));
objectCell.setX1(o.getBbox().get(2));
objectCell.setY0(o.getBbox().get(1));
objectCell.setY1(o.getBbox().get(3));
objectCell.setWidth(objectCell.getX1()- objectCell.getX0());
objectCell.setHeight(objectCell.getY1()- objectCell.getY0());
objectCell.setLabel(o.getLabel());
@ -66,6 +66,7 @@ public class TableExtractorResponseAdapter {
});
});
log.info("result from parsing: {}",parsedTableCells);
return parsedTableCells;
}

View File

@ -64,8 +64,8 @@ public class ViewerDocumentService {
PDDocument pdDocument = openPDDocument(originFile);
LayoutGrid layoutGrid = layoutGridService.createLayoutGrid(document);
PDOptionalContentGroup layer = addLayerToDocument(pdDocument, false);
PDOptionalContentGroup visualLayoutParsingLayer = addLayerToDocument(pdDocument, true);
PDOptionalContentGroup layer = addLayerToDocument(pdDocument, layerVisibilityDefaultValue);
PDOptionalContentGroup tableExtractorLayer = addLayerToDocument(pdDocument, true);
PDFont font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
for (int pageNumber = 0; pageNumber < pdDocument.getNumberOfPages(); pageNumber++) {
@ -84,7 +84,7 @@ public class ViewerDocumentService {
// We need to append to the content stream, otherwise the content could be overlapped by following content.
try (var contentStream = new PDPageContentStream(pdDocument, pdPage, PDPageContentStream.AppendMode.APPEND, true)) {
contentStream.beginMarkedContent(COSName.OC, layer);
/*contentStream.beginMarkedContent(COSName.OC, layer);
contentStream.saveGraphicsState();
contentStream.setLineWidth(LINE_WIDTH);
@ -124,14 +124,15 @@ public class ViewerDocumentService {
contentStream.endText();
}
contentStream.restoreGraphicsState();
contentStream.endMarkedContent();
contentStream.endMarkedContent();*/
contentStream.beginMarkedContent(COSName.OC, visualLayoutParsingLayer);
contentStream.beginMarkedContent(COSName.OC, tableExtractorLayer);
contentStream.saveGraphicsState();
contentStream.setLineWidth(LINE_WIDTH);
for (TableExtractorCells tableCells : extractedTableCells.get(pageNumber)) {
contentStream.setStrokingColor(new Color(0xB700FF));
log.info("drawn tableCell {} on page {}",tableCells, pageNumber);
contentStream.setStrokingColor(new Color(0xFF00DD));
contentStream.addRect((float) tableCells.getX0(), (float) tableCells.getY0(), (float) tableCells.getWidth(), (float) tableCells.getHeight());
contentStream.stroke();
contentStream.setFont(font, FONT_SIZE);