RED-7375 table extractor prototype

This commit is contained in:
yhampe 2024-01-30 11:59:18 +01:00
parent b4e5f2da2f
commit 0c3b910088
4 changed files with 5 additions and 49 deletions

View File

@ -93,7 +93,6 @@ public class LayoutParsingPipeline {
TableExtractorResponse tableExtractorResponse = new TableExtractorResponse();
if (layoutParsingRequest.tableExtractorFileId().isPresent()) {
log.info("TABLEEXTRACTORRESPONSE:"+tableExtractorResponse);
tableExtractorResponse = layoutParsingStorageService.getExtractedTableFile(layoutParsingRequest.tableExtractorFileId().get());
}

View File

@ -20,8 +20,10 @@ import com.knecon.fforesight.service.layoutparser.processor.python_api.model.tab
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorResponse;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Service
@Slf4j
@RequiredArgsConstructor
public class TableExtractorResponseAdapter {
@ -48,6 +50,7 @@ public class TableExtractorResponseAdapter {
tableCells.setWidth(tableCells.getX1()- tableCells.getX0());
tableCells.setHeight(tableCells.getY1()- tableCells.getY0());
tableCells.setLabel(t.getTable().getLabel());
log.info("Parsed table cell {}",tableCells);
parsedTableCells.add(tableCells);
t.getObjects().forEach(o -> {
TableExtractorCells objectCell = new TableExtractorCells();
@ -58,6 +61,7 @@ public class TableExtractorResponseAdapter {
objectCell.setWidth(objectCell.getX1()- objectCell.getX0());
objectCell.setHeight(objectCell.getY1()- objectCell.getY0());
objectCell.setLabel(o.getLabel());
log.info("Parsed object cell {}",objectCell);
parsedTableCells.add(objectCell);
});
});

View File

@ -14,11 +14,6 @@ import lombok.NoArgsConstructor;
public class TableExtractorData {
private int page_number;
private int page_rotation;
private int image_heigth;
private int image_width;
private float pdf_height;
private float pdf_width;
private int dpi;
private int image;
private List<ExtractedTable> tables;
}

View File

@ -66,7 +66,6 @@ public class ViewerDocumentService {
// PDDocument.save() is very slow, since it actually traverses the entire pdf and writes a new one.
// If we collect all COSDictionaries we changed and tell it explicitly to only add the changed ones by using saveIncremental it's very fast.
Set<COSDictionary> dictionariesToUpdate = new HashSet<>();
PDOptionalContentGroup layer = addLayerToDocument(pdDocument, dictionariesToUpdate, layerVisibilityDefaultValue);
PDOptionalContentGroup tableExtractorLayer = addLayerToDocument(pdDocument, dictionariesToUpdate, true);
@ -87,47 +86,6 @@ public class ViewerDocumentService {
// We need to append to the content stream, otherwise the content could be overlapped by following content.
try (var contentStream = new PDPageContentStream(pdDocument, pdPage, PDPageContentStream.AppendMode.APPEND, true)) {
contentStream.beginMarkedContent(COSName.OC, layer);
contentStream.saveGraphicsState();
contentStream.setLineWidth(LINE_WIDTH);
for (ColoredLine coloredLine : visualizationsOnPage.getColoredLines()) {
contentStream.setStrokingColor(coloredLine.color());
contentStream.moveTo((float) coloredLine.line().getX1(), (float) coloredLine.line().getY1());
contentStream.lineTo((float) coloredLine.line().getX2(), (float) coloredLine.line().getY2());
contentStream.stroke();
}
for (ColoredRectangle coloredRectangle : visualizationsOnPage.getColoredRectangles()) {
contentStream.setStrokingColor(coloredRectangle.color());
Rectangle2D r = coloredRectangle.rectangle2D();
contentStream.addRect((float) r.getX(), (float) r.getY(), (float) r.getWidth(), (float) r.getHeight());
contentStream.stroke();
}
for (FilledRectangle filledRectangle : visualizationsOnPage.getFilledRectangles()) {
contentStream.setNonStrokingColor(filledRectangle.color());
PDExtendedGraphicsState graphicsState = new PDExtendedGraphicsState();
graphicsState.setNonStrokingAlphaConstant(filledRectangle.alpha());
contentStream.setGraphicsStateParameters(graphicsState);
Rectangle2D r = filledRectangle.rectangle2D();
contentStream.addRect((float) r.getX(), (float) r.getY(), (float) r.getWidth(), (float) r.getHeight());
contentStream.fill();
}
for (PlacedText placedText : visualizationsOnPage.getPlacedTexts()) {
contentStream.setFont(font, FONT_SIZE);
contentStream.beginText();
Matrix textMatrix = new Matrix((float) textDeRotationMatrix.getScaleX(),
(float) textDeRotationMatrix.getShearX(),
(float) textDeRotationMatrix.getShearY(),
(float) textDeRotationMatrix.getScaleY(),
(float) placedText.lineStart().getX(),
(float) placedText.lineStart().getY());
textMatrix.translate(-((font.getStringWidth(placedText.text()) / 1000) * FONT_SIZE + (2 * LINE_WIDTH) + 4), -FONT_SIZE);
contentStream.setTextMatrix(textMatrix);
contentStream.showText(placedText.text());
contentStream.endText();
}
contentStream.restoreGraphicsState();
contentStream.endMarkedContent();
contentStream.beginMarkedContent(COSName.OC, tableExtractorLayer);
contentStream.saveGraphicsState();