Merge branch 'RED-8106' into 'main'

RED-8106: Make documentdata serializable

See merge request fforesight/layout-parser!95
This commit is contained in:
Dominique Eifländer 2023-12-22 13:33:02 +01:00
commit 368a75e985
6 changed files with 21 additions and 11 deletions

View File

@ -1,5 +1,7 @@
package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction; package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
import java.io.Serializable;
import io.swagger.v3.oas.annotations.media.Schema; import io.swagger.v3.oas.annotations.media.Schema;
import lombok.AccessLevel; import lombok.AccessLevel;
import lombok.AllArgsConstructor; import lombok.AllArgsConstructor;
@ -12,7 +14,7 @@ import lombok.experimental.FieldDefaults;
@AllArgsConstructor @AllArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) @FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
@Schema(description = "Object containing the complete document layout parsing information. It is split into 4 categories, structure, text, positions and pages: " + "The document tree structure of SemanticNodes such as Section, Paragraph, Headline, etc. " + "The text, which is stored as separate blocks of data. " + "The text positions, which are also stored as separate blocks. The Blocks are equal to the text blocks in length and order. " + "The page information.") @Schema(description = "Object containing the complete document layout parsing information. It is split into 4 categories, structure, text, positions and pages: " + "The document tree structure of SemanticNodes such as Section, Paragraph, Headline, etc. " + "The text, which is stored as separate blocks of data. " + "The text positions, which are also stored as separate blocks. The Blocks are equal to the text blocks in length and order. " + "The page information.")
public class DocumentData { public class DocumentData implements Serializable {
@Schema(description = "Contains information about the document's pages.") @Schema(description = "Contains information about the document's pages.")
DocumentPage[] documentPages; DocumentPage[] documentPages;

View File

@ -1,5 +1,7 @@
package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction; package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
import java.io.Serializable;
import io.swagger.v3.oas.annotations.media.Schema; import io.swagger.v3.oas.annotations.media.Schema;
import lombok.AccessLevel; import lombok.AccessLevel;
import lombok.AllArgsConstructor; import lombok.AllArgsConstructor;
@ -14,7 +16,7 @@ import lombok.experimental.FieldDefaults;
@AllArgsConstructor @AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE) @FieldDefaults(level = AccessLevel.PRIVATE)
@Schema(description = "Object containing information about the document's pages.") @Schema(description = "Object containing information about the document's pages.")
public class DocumentPage { public class DocumentPage implements Serializable {
@Schema(description = "The page number, starting with 1.") @Schema(description = "The page number, starting with 1.")
int number; int number;

View File

@ -1,5 +1,7 @@
package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction; package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
import java.io.Serializable;
import io.swagger.v3.oas.annotations.media.Schema; import io.swagger.v3.oas.annotations.media.Schema;
import lombok.AccessLevel; import lombok.AccessLevel;
import lombok.AllArgsConstructor; import lombok.AllArgsConstructor;
@ -14,7 +16,7 @@ import lombok.experimental.FieldDefaults;
@AllArgsConstructor @AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE) @FieldDefaults(level = AccessLevel.PRIVATE)
@Schema(description = "Object containing text positional information of a specific text block. A document is split into multiple text blocks, which are supposed to be read in order. Every text block can only occur on a single page.") @Schema(description = "Object containing text positional information of a specific text block. A document is split into multiple text blocks, which are supposed to be read in order. Every text block can only occur on a single page.")
public class DocumentPositionData { public class DocumentPositionData implements Serializable {
@Schema(description = "Identifier of the text block.") @Schema(description = "Identifier of the text block.")
Long id; Long id;

View File

@ -1,6 +1,7 @@
package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction; package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
import java.awt.geom.Rectangle2D; import java.awt.geom.Rectangle2D;
import java.io.Serializable;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -20,14 +21,13 @@ import lombok.experimental.FieldDefaults;
@AllArgsConstructor @AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE) @FieldDefaults(level = AccessLevel.PRIVATE)
@Schema(description = "Object containing information about the parsed tree structure of the SemanticNodes, such as Section, Paragraph, Headline etc inside of the document.") @Schema(description = "Object containing information about the parsed tree structure of the SemanticNodes, such as Section, Paragraph, Headline etc inside of the document.")
public class DocumentStructure { public class DocumentStructure implements Serializable {
@Schema(description = "The root EntryData represents the Document.") @Schema(description = "The root EntryData represents the Document.")
EntryData root; EntryData root;
@Schema(description = "Object containing the extra field names, a table has in its properties field.") @Schema(description = "Object containing the extra field names, a table has in its properties field.")
public static class TableProperties { public static class TableProperties implements Serializable {
public static final String NUMBER_OF_ROWS = "numberOfRows"; public static final String NUMBER_OF_ROWS = "numberOfRows";
public static final String NUMBER_OF_COLS = "numberOfCols"; public static final String NUMBER_OF_COLS = "numberOfCols";
@ -35,7 +35,7 @@ public class DocumentStructure {
} }
@Schema(description = "Object containing the extra field names, an Image has in its properties field.") @Schema(description = "Object containing the extra field names, an Image has in its properties field.")
public static class ImageProperties { public static class ImageProperties implements Serializable {
public static final String TRANSPARENT = "transparent"; public static final String TRANSPARENT = "transparent";
public static final String IMAGE_TYPE = "imageType"; public static final String IMAGE_TYPE = "imageType";
@ -45,7 +45,7 @@ public class DocumentStructure {
} }
@Schema(description = "Object containing the extra field names, a table cell has in its properties field.") @Schema(description = "Object containing the extra field names, a table cell has in its properties field.")
public static class TableCellProperties { public static class TableCellProperties implements Serializable {
public static final String B_BOX = "bBox"; public static final String B_BOX = "bBox";
public static final String ROW = "row"; public static final String ROW = "row";
@ -56,6 +56,7 @@ public class DocumentStructure {
public static final String RECTANGLE_DELIMITER = ";"; public static final String RECTANGLE_DELIMITER = ";";
public static Rectangle2D parseRectangle2D(String bBox) { public static Rectangle2D parseRectangle2D(String bBox) {
List<Float> floats = Arrays.stream(bBox.split(RECTANGLE_DELIMITER)).map(Float::parseFloat).toList(); List<Float> floats = Arrays.stream(bBox.split(RECTANGLE_DELIMITER)).map(Float::parseFloat).toList();
@ -100,7 +101,7 @@ public class DocumentStructure {
@AllArgsConstructor @AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE) @FieldDefaults(level = AccessLevel.PRIVATE)
@Schema(description = "Object containing information of a SemanticNode and also structuring the layout with children.") @Schema(description = "Object containing information of a SemanticNode and also structuring the layout with children.")
public static class EntryData { public static class EntryData implements Serializable {
@Schema(description = "Type of the semantic node.", allowableValues = {"DOCUMENT", "SECTION", "PARAGRAPH", "HEADLINE", "TABLE", "TABLE_CELL", "HEADER", "FOOTER", "IMAGE"}) @Schema(description = "Type of the semantic node.", allowableValues = {"DOCUMENT", "SECTION", "PARAGRAPH", "HEADLINE", "TABLE", "TABLE_CELL", "HEADER", "FOOTER", "IMAGE"})
NodeType type; NodeType type;

View File

@ -1,5 +1,7 @@
package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction; package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
import java.io.Serializable;
import io.swagger.v3.oas.annotations.media.Schema; import io.swagger.v3.oas.annotations.media.Schema;
import lombok.AccessLevel; import lombok.AccessLevel;
import lombok.AllArgsConstructor; import lombok.AllArgsConstructor;
@ -14,7 +16,7 @@ import lombok.experimental.FieldDefaults;
@AllArgsConstructor @AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE) @FieldDefaults(level = AccessLevel.PRIVATE)
@Schema(description = "Object containing text information of a specific text block. A document is split into multiple text blocks, which are supposed to be read in order. Every text block can only occur on a single page.") @Schema(description = "Object containing text information of a specific text block. A document is split into multiple text blocks, which are supposed to be read in order. Every text block can only occur on a single page.")
public class DocumentTextData { public class DocumentTextData implements Serializable {
@Schema(description = "Identifier of the text block.") @Schema(description = "Identifier of the text block.")
Long id; Long id;

View File

@ -1,8 +1,9 @@
package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction; package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
import java.io.Serializable;
import java.util.Locale; import java.util.Locale;
public enum NodeType { public enum NodeType implements Serializable {
DOCUMENT, DOCUMENT,
SECTION, SECTION,
HEADLINE, HEADLINE,