Merge branch 'RED-6725' into 'main'
Red 6725 See merge request fforesight/layout-parser!5
This commit is contained in:
commit
69c5f80c8c
@ -1,5 +1,7 @@
|
||||
package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Stream;
|
||||
@ -21,6 +23,39 @@ public class DocumentStructure {
|
||||
EntryData root;
|
||||
|
||||
|
||||
public static class TableProperties {
|
||||
|
||||
public static final String NUMBER_OF_ROWS = "numberOfRows";
|
||||
public static final String NUMBER_OF_COLS = "numberOfCols";
|
||||
|
||||
}
|
||||
|
||||
public static class ImageProperties {
|
||||
|
||||
public static final String TRANSPARENT = "transparent";
|
||||
public static final String IMAGE_TYPE = "imageType";
|
||||
public static final String POSITION = "position";
|
||||
|
||||
}
|
||||
|
||||
public static class TableCellProperties {
|
||||
|
||||
public static final String B_BOX = "bBox";
|
||||
public static final String ROW = "row";
|
||||
public static final String COL = "col";
|
||||
public static final String HEADER = "header";
|
||||
|
||||
}
|
||||
|
||||
public static final String RECTANGLE_DELIMITER = ";";
|
||||
|
||||
public static Rectangle2D parseRectangle2D(String bBox) {
|
||||
|
||||
List<Float> floats = Arrays.stream(bBox.split(RECTANGLE_DELIMITER)).map(Float::parseFloat).toList();
|
||||
return new Rectangle2D.Float(floats.get(0), floats.get(1), floats.get(2), floats.get(3));
|
||||
}
|
||||
|
||||
|
||||
public EntryData get(List<Integer> tocId) {
|
||||
|
||||
if (tocId.isEmpty()) {
|
||||
@ -66,6 +101,7 @@ public class DocumentStructure {
|
||||
Map<String, String> properties;
|
||||
List<EntryData> children;
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
|
||||
@ -52,12 +52,12 @@ public class LayoutParsingPipeline {
|
||||
try (PDDocument originDocument = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId())) {
|
||||
ImageServiceResponse imageServiceResponse = new ImageServiceResponse();
|
||||
if (layoutParsingRequest.imagesFileStorageId().isPresent()) {
|
||||
imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.pageFileStorageId());
|
||||
imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.imagesFileStorageId().get());
|
||||
}
|
||||
|
||||
TableServiceResponse tableServiceResponse = new TableServiceResponse();
|
||||
if (layoutParsingRequest.tablesFileStorageId().isPresent()) {
|
||||
tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.pageFileStorageId());
|
||||
tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.tablesFileStorageId().get());
|
||||
}
|
||||
|
||||
Document documentGraph = parseLayout(layoutParsingRequest.layoutParsingType(), originDocument, imageServiceResponse, tableServiceResponse);
|
||||
|
||||
@ -3,9 +3,15 @@ package com.knecon.fforesight.service.layoutparser.processor.python_api.model.im
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class Classification {
|
||||
|
||||
private Map<String, Float> probabilities = new HashMap<>();
|
||||
|
||||
@ -1,8 +1,14 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.image;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class FilterGeometry {
|
||||
|
||||
private ImageSize imageSize;
|
||||
|
||||
@ -1,8 +1,14 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.image;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class Filters {
|
||||
|
||||
private FilterGeometry geometry;
|
||||
|
||||
@ -1,8 +1,14 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.image;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class Geometry {
|
||||
|
||||
private float width;
|
||||
|
||||
@ -1,8 +1,14 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.image;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class ImageFormat {
|
||||
|
||||
private float quotient;
|
||||
|
||||
@ -1,8 +1,14 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.image;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class ImageMetadata {
|
||||
|
||||
private Classification classification;
|
||||
|
||||
@ -6,9 +6,15 @@ import java.util.List;
|
||||
import com.fasterxml.jackson.annotation.JsonAlias;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class ImageServiceResponse {
|
||||
|
||||
private String dossierId;
|
||||
|
||||
@ -1,8 +1,14 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.image;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class ImageSize {
|
||||
|
||||
private float quotient;
|
||||
|
||||
@ -1,8 +1,14 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.image;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class Position {
|
||||
|
||||
private float x1;
|
||||
|
||||
@ -1,8 +1,14 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.image;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class Probability {
|
||||
|
||||
private boolean unconfident;
|
||||
|
||||
@ -1,8 +1,14 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class PageInfo {
|
||||
|
||||
private int number;
|
||||
|
||||
@ -3,12 +3,12 @@ package com.knecon.fforesight.service.layoutparser.processor.python_api.model.ta
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@RequiredArgsConstructor
|
||||
public class PdfTableCell {
|
||||
|
||||
private float x0;
|
||||
|
||||
@ -1,10 +1,14 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class TableCells {
|
||||
|
||||
private float x0;
|
||||
|
||||
@ -3,9 +3,15 @@ package com.knecon.fforesight.service.layoutparser.processor.python_api.model.ta
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class TableData {
|
||||
|
||||
private PageInfo pageInfo;
|
||||
|
||||
@ -3,9 +3,15 @@ package com.knecon.fforesight.service.layoutparser.processor.python_api.model.ta
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class TableServiceResponse {
|
||||
|
||||
private String dossierId;
|
||||
|
||||
@ -1,36 +1,23 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.mapper;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Image;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
|
||||
|
||||
public class PropertiesMapper {
|
||||
|
||||
public static final String TRANSPARENT = "transparent";
|
||||
public static final String IMAGE_TYPE = "imageType";
|
||||
public static final String POSITION = "position";
|
||||
public static final String ROW = "row";
|
||||
public static final String COL = "col";
|
||||
public static final String HEADER = "header";
|
||||
public static final String B_BOX = "bBox";
|
||||
public static final String NUMBER_OF_ROWS = "numberOfRows";
|
||||
public static final String NUMBER_OF_COLS = "numberOfCols";
|
||||
|
||||
|
||||
public static Map<String, String> buildImageProperties(Image image) {
|
||||
|
||||
Map<String, String> properties = new HashMap<>();
|
||||
properties.put(IMAGE_TYPE, image.getImageType().toString());
|
||||
properties.put(TRANSPARENT, String.valueOf(image.isTransparent()));
|
||||
properties.put(POSITION, RectangleTransformations.toString(image.getPosition()));
|
||||
properties.put(DocumentStructure.ImageProperties.IMAGE_TYPE, image.getImageType().toString());
|
||||
properties.put(DocumentStructure.ImageProperties.TRANSPARENT, String.valueOf(image.isTransparent()));
|
||||
properties.put(DocumentStructure.ImageProperties.POSITION, toString(image.getPosition()));
|
||||
return properties;
|
||||
}
|
||||
|
||||
@ -38,15 +25,15 @@ public class PropertiesMapper {
|
||||
public static Map<String, String> buildTableCellProperties(TableCell tableCell) {
|
||||
|
||||
Map<String, String> properties = new HashMap<>();
|
||||
properties.put(ROW, String.valueOf(tableCell.getRow()));
|
||||
properties.put(COL, String.valueOf(tableCell.getCol()));
|
||||
properties.put(HEADER, String.valueOf(tableCell.isHeader()));
|
||||
properties.put(DocumentStructure.TableCellProperties.ROW, String.valueOf(tableCell.getRow()));
|
||||
properties.put(DocumentStructure.TableCellProperties.COL, String.valueOf(tableCell.getCol()));
|
||||
properties.put(DocumentStructure.TableCellProperties.HEADER, String.valueOf(tableCell.isHeader()));
|
||||
|
||||
if (tableCell.getPages().size() > 1 || tableCell.getBBox().keySet().size() > 1) {
|
||||
throw new IllegalArgumentException("TableCell can only occur on a single page!");
|
||||
}
|
||||
String bBoxString = RectangleTransformations.toString(tableCell.getBBox().get(tableCell.getPages().stream().findFirst().get()));
|
||||
properties.put(B_BOX, bBoxString);
|
||||
String bBoxString = toString(tableCell.getBBox().get(tableCell.getPages().stream().findFirst().get()));
|
||||
properties.put(DocumentStructure.TableCellProperties.B_BOX, bBoxString);
|
||||
|
||||
return properties;
|
||||
}
|
||||
@ -55,33 +42,33 @@ public class PropertiesMapper {
|
||||
public static Map<String, String> buildTableProperties(Table table) {
|
||||
|
||||
Map<String, String> properties = new HashMap<>();
|
||||
properties.put(NUMBER_OF_ROWS, String.valueOf(table.getNumberOfRows()));
|
||||
properties.put(NUMBER_OF_COLS, String.valueOf(table.getNumberOfCols()));
|
||||
properties.put(DocumentStructure.TableProperties.NUMBER_OF_ROWS, String.valueOf(table.getNumberOfRows()));
|
||||
properties.put(DocumentStructure.TableProperties.NUMBER_OF_COLS, String.valueOf(table.getNumberOfCols()));
|
||||
return properties;
|
||||
}
|
||||
|
||||
|
||||
public static void parseImageProperties(Map<String, String> properties, Image.ImageBuilder builder) {
|
||||
|
||||
builder.imageType(parseImageType(properties.get(IMAGE_TYPE)));
|
||||
builder.transparent(Boolean.parseBoolean(properties.get(TRANSPARENT)));
|
||||
builder.position(parseRectangle2D(properties.get(POSITION)));
|
||||
builder.imageType(parseImageType(properties.get(DocumentStructure.ImageProperties.IMAGE_TYPE)));
|
||||
builder.transparent(Boolean.parseBoolean(properties.get(DocumentStructure.ImageProperties.TRANSPARENT)));
|
||||
builder.position(DocumentStructure.parseRectangle2D(properties.get(DocumentStructure.ImageProperties.POSITION)));
|
||||
}
|
||||
|
||||
|
||||
public static void parseTableCellProperties(Map<String, String> properties, TableCell.TableCellBuilder builder) {
|
||||
|
||||
builder.row(Integer.parseInt(properties.get(ROW)));
|
||||
builder.col(Integer.parseInt(properties.get(COL)));
|
||||
builder.header(Boolean.parseBoolean(properties.get(HEADER)));
|
||||
builder.bBox(parseRectangle2D(properties.get(B_BOX)));
|
||||
builder.row(Integer.parseInt(properties.get(DocumentStructure.TableCellProperties.ROW)));
|
||||
builder.col(Integer.parseInt(properties.get(DocumentStructure.TableCellProperties.COL)));
|
||||
builder.header(Boolean.parseBoolean(properties.get(DocumentStructure.TableCellProperties.HEADER)));
|
||||
builder.bBox(DocumentStructure.parseRectangle2D(properties.get(DocumentStructure.TableCellProperties.B_BOX)));
|
||||
}
|
||||
|
||||
|
||||
public static void parseTableProperties(Map<String, String> properties, Table.TableBuilder builder) {
|
||||
|
||||
builder.numberOfRows(Integer.parseInt(properties.get(NUMBER_OF_ROWS)));
|
||||
builder.numberOfCols(Integer.parseInt(properties.get(NUMBER_OF_COLS)));
|
||||
builder.numberOfRows(Integer.parseInt(properties.get(DocumentStructure.TableProperties.NUMBER_OF_ROWS)));
|
||||
builder.numberOfCols(Integer.parseInt(properties.get(DocumentStructure.TableProperties.NUMBER_OF_COLS)));
|
||||
}
|
||||
|
||||
|
||||
@ -99,14 +86,14 @@ public class PropertiesMapper {
|
||||
|
||||
public static String toString(Rectangle2D rectangle2D) {
|
||||
|
||||
return String.format("%f,%f,%f,%f", rectangle2D.getX(), rectangle2D.getY(), rectangle2D.getWidth(), rectangle2D.getHeight());
|
||||
}
|
||||
|
||||
|
||||
public static Rectangle2D parseRectangle2D(String bBox) {
|
||||
|
||||
List<Float> floats = Arrays.stream(bBox.split(",")).map(Float::parseFloat).toList();
|
||||
return new Rectangle2D.Float(floats.get(0), floats.get(1), floats.get(2), floats.get(3));
|
||||
return String.format("%f%s%f%s%f%s%f",
|
||||
rectangle2D.getX(),
|
||||
DocumentStructure.RECTANGLE_DELIMITER,
|
||||
rectangle2D.getY(),
|
||||
DocumentStructure.RECTANGLE_DELIMITER,
|
||||
rectangle2D.getWidth(),
|
||||
DocumentStructure.RECTANGLE_DELIMITER,
|
||||
rectangle2D.getHeight());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user