Merge branch 'RED-6725' into 'main'

Red 6725

See merge request fforesight/layout-parser!5
This commit is contained in:
Kilian Schüttler 2023-07-31 13:19:39 +02:00
commit 69c5f80c8c
18 changed files with 151 additions and 46 deletions

View File

@ -1,5 +1,7 @@
package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
import java.awt.geom.Rectangle2D;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Stream;
@ -21,6 +23,39 @@ public class DocumentStructure {
EntryData root;
public static class TableProperties {
public static final String NUMBER_OF_ROWS = "numberOfRows";
public static final String NUMBER_OF_COLS = "numberOfCols";
}
public static class ImageProperties {
public static final String TRANSPARENT = "transparent";
public static final String IMAGE_TYPE = "imageType";
public static final String POSITION = "position";
}
public static class TableCellProperties {
public static final String B_BOX = "bBox";
public static final String ROW = "row";
public static final String COL = "col";
public static final String HEADER = "header";
}
public static final String RECTANGLE_DELIMITER = ";";
public static Rectangle2D parseRectangle2D(String bBox) {
List<Float> floats = Arrays.stream(bBox.split(RECTANGLE_DELIMITER)).map(Float::parseFloat).toList();
return new Rectangle2D.Float(floats.get(0), floats.get(1), floats.get(2), floats.get(3));
}
public EntryData get(List<Integer> tocId) {
if (tocId.isEmpty()) {
@ -66,6 +101,7 @@ public class DocumentStructure {
Map<String, String> properties;
List<EntryData> children;
@Override
public String toString() {

View File

@ -52,12 +52,12 @@ public class LayoutParsingPipeline {
try (PDDocument originDocument = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId())) {
ImageServiceResponse imageServiceResponse = new ImageServiceResponse();
if (layoutParsingRequest.imagesFileStorageId().isPresent()) {
imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.pageFileStorageId());
imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.imagesFileStorageId().get());
}
TableServiceResponse tableServiceResponse = new TableServiceResponse();
if (layoutParsingRequest.tablesFileStorageId().isPresent()) {
tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.pageFileStorageId());
tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.tablesFileStorageId().get());
}
Document documentGraph = parseLayout(layoutParsingRequest.layoutParsingType(), originDocument, imageServiceResponse, tableServiceResponse);

View File

@ -3,9 +3,15 @@ package com.knecon.fforesight.service.layoutparser.processor.python_api.model.im
import java.util.HashMap;
import java.util.Map;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class Classification {
private Map<String, Float> probabilities = new HashMap<>();

View File

@ -1,8 +1,14 @@
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.image;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class FilterGeometry {
private ImageSize imageSize;

View File

@ -1,8 +1,14 @@
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.image;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class Filters {
private FilterGeometry geometry;

View File

@ -1,8 +1,14 @@
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.image;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class Geometry {
private float width;

View File

@ -1,8 +1,14 @@
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.image;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class ImageFormat {
private float quotient;

View File

@ -1,8 +1,14 @@
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.image;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class ImageMetadata {
private Classification classification;

View File

@ -6,9 +6,15 @@ import java.util.List;
import com.fasterxml.jackson.annotation.JsonAlias;
import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class ImageServiceResponse {
private String dossierId;

View File

@ -1,8 +1,14 @@
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.image;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class ImageSize {
private float quotient;

View File

@ -1,8 +1,14 @@
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.image;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class Position {
private float x1;

View File

@ -1,8 +1,14 @@
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.image;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class Probability {
private boolean unconfident;

View File

@ -1,8 +1,14 @@
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class PageInfo {
private int number;

View File

@ -3,12 +3,12 @@ package com.knecon.fforesight.service.layoutparser.processor.python_api.model.ta
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.RequiredArgsConstructor;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
@RequiredArgsConstructor
public class PdfTableCell {
private float x0;

View File

@ -1,10 +1,14 @@
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class TableCells {
private float x0;

View File

@ -3,9 +3,15 @@ package com.knecon.fforesight.service.layoutparser.processor.python_api.model.ta
import java.util.ArrayList;
import java.util.List;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class TableData {
private PageInfo pageInfo;

View File

@ -3,9 +3,15 @@ package com.knecon.fforesight.service.layoutparser.processor.python_api.model.ta
import java.util.ArrayList;
import java.util.List;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class TableServiceResponse {
private String dossierId;

View File

@ -1,36 +1,23 @@
package com.knecon.fforesight.service.layoutparser.processor.services.mapper;
import java.awt.geom.Rectangle2D;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Image;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
public class PropertiesMapper {
public static final String TRANSPARENT = "transparent";
public static final String IMAGE_TYPE = "imageType";
public static final String POSITION = "position";
public static final String ROW = "row";
public static final String COL = "col";
public static final String HEADER = "header";
public static final String B_BOX = "bBox";
public static final String NUMBER_OF_ROWS = "numberOfRows";
public static final String NUMBER_OF_COLS = "numberOfCols";
public static Map<String, String> buildImageProperties(Image image) {
Map<String, String> properties = new HashMap<>();
properties.put(IMAGE_TYPE, image.getImageType().toString());
properties.put(TRANSPARENT, String.valueOf(image.isTransparent()));
properties.put(POSITION, RectangleTransformations.toString(image.getPosition()));
properties.put(DocumentStructure.ImageProperties.IMAGE_TYPE, image.getImageType().toString());
properties.put(DocumentStructure.ImageProperties.TRANSPARENT, String.valueOf(image.isTransparent()));
properties.put(DocumentStructure.ImageProperties.POSITION, toString(image.getPosition()));
return properties;
}
@ -38,15 +25,15 @@ public class PropertiesMapper {
public static Map<String, String> buildTableCellProperties(TableCell tableCell) {
Map<String, String> properties = new HashMap<>();
properties.put(ROW, String.valueOf(tableCell.getRow()));
properties.put(COL, String.valueOf(tableCell.getCol()));
properties.put(HEADER, String.valueOf(tableCell.isHeader()));
properties.put(DocumentStructure.TableCellProperties.ROW, String.valueOf(tableCell.getRow()));
properties.put(DocumentStructure.TableCellProperties.COL, String.valueOf(tableCell.getCol()));
properties.put(DocumentStructure.TableCellProperties.HEADER, String.valueOf(tableCell.isHeader()));
if (tableCell.getPages().size() > 1 || tableCell.getBBox().keySet().size() > 1) {
throw new IllegalArgumentException("TableCell can only occur on a single page!");
}
String bBoxString = RectangleTransformations.toString(tableCell.getBBox().get(tableCell.getPages().stream().findFirst().get()));
properties.put(B_BOX, bBoxString);
String bBoxString = toString(tableCell.getBBox().get(tableCell.getPages().stream().findFirst().get()));
properties.put(DocumentStructure.TableCellProperties.B_BOX, bBoxString);
return properties;
}
@ -55,33 +42,33 @@ public class PropertiesMapper {
public static Map<String, String> buildTableProperties(Table table) {
Map<String, String> properties = new HashMap<>();
properties.put(NUMBER_OF_ROWS, String.valueOf(table.getNumberOfRows()));
properties.put(NUMBER_OF_COLS, String.valueOf(table.getNumberOfCols()));
properties.put(DocumentStructure.TableProperties.NUMBER_OF_ROWS, String.valueOf(table.getNumberOfRows()));
properties.put(DocumentStructure.TableProperties.NUMBER_OF_COLS, String.valueOf(table.getNumberOfCols()));
return properties;
}
public static void parseImageProperties(Map<String, String> properties, Image.ImageBuilder builder) {
builder.imageType(parseImageType(properties.get(IMAGE_TYPE)));
builder.transparent(Boolean.parseBoolean(properties.get(TRANSPARENT)));
builder.position(parseRectangle2D(properties.get(POSITION)));
builder.imageType(parseImageType(properties.get(DocumentStructure.ImageProperties.IMAGE_TYPE)));
builder.transparent(Boolean.parseBoolean(properties.get(DocumentStructure.ImageProperties.TRANSPARENT)));
builder.position(DocumentStructure.parseRectangle2D(properties.get(DocumentStructure.ImageProperties.POSITION)));
}
public static void parseTableCellProperties(Map<String, String> properties, TableCell.TableCellBuilder builder) {
builder.row(Integer.parseInt(properties.get(ROW)));
builder.col(Integer.parseInt(properties.get(COL)));
builder.header(Boolean.parseBoolean(properties.get(HEADER)));
builder.bBox(parseRectangle2D(properties.get(B_BOX)));
builder.row(Integer.parseInt(properties.get(DocumentStructure.TableCellProperties.ROW)));
builder.col(Integer.parseInt(properties.get(DocumentStructure.TableCellProperties.COL)));
builder.header(Boolean.parseBoolean(properties.get(DocumentStructure.TableCellProperties.HEADER)));
builder.bBox(DocumentStructure.parseRectangle2D(properties.get(DocumentStructure.TableCellProperties.B_BOX)));
}
public static void parseTableProperties(Map<String, String> properties, Table.TableBuilder builder) {
builder.numberOfRows(Integer.parseInt(properties.get(NUMBER_OF_ROWS)));
builder.numberOfCols(Integer.parseInt(properties.get(NUMBER_OF_COLS)));
builder.numberOfRows(Integer.parseInt(properties.get(DocumentStructure.TableProperties.NUMBER_OF_ROWS)));
builder.numberOfCols(Integer.parseInt(properties.get(DocumentStructure.TableProperties.NUMBER_OF_COLS)));
}
@ -99,14 +86,14 @@ public class PropertiesMapper {
public static String toString(Rectangle2D rectangle2D) {
return String.format("%f,%f,%f,%f", rectangle2D.getX(), rectangle2D.getY(), rectangle2D.getWidth(), rectangle2D.getHeight());
}
public static Rectangle2D parseRectangle2D(String bBox) {
List<Float> floats = Arrays.stream(bBox.split(",")).map(Float::parseFloat).toList();
return new Rectangle2D.Float(floats.get(0), floats.get(1), floats.get(2), floats.get(3));
return String.format("%f%s%f%s%f%s%f",
rectangle2D.getX(),
DocumentStructure.RECTANGLE_DELIMITER,
rectangle2D.getY(),
DocumentStructure.RECTANGLE_DELIMITER,
rectangle2D.getWidth(),
DocumentStructure.RECTANGLE_DELIMITER,
rectangle2D.getHeight());
}
}