From bcf0bcbaf42ecbf47ccf47f8b46870add87eaf8f Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Thu, 24 Aug 2023 18:37:47 +0200 Subject: [PATCH] Added some documentation --- .gitlab-ci.yml | 2 +- README.md | 88 +++++++++++++++++++ .../build.gradle.kts | 4 + .../api/data/redaction/DocumentData.java | 6 ++ .../api/data/redaction/DocumentPage.java | 6 ++ .../data/redaction/DocumentPositionData.java | 5 ++ .../api/data/redaction/DocumentStructure.java | 13 +++ .../api/data/redaction/DocumentTextData.java | 11 ++- .../data/redaction/SimplifiedSectionText.java | 4 + .../api/data/redaction/SimplifiedText.java | 4 + .../internal/api/data/taas/ParagraphData.java | 9 ++ .../internal/api/data/taas/Range.java | 3 + .../api/data/taas/ResearchDocumentData.java | 5 ++ .../internal/api/data/taas/RowData.java | 5 ++ .../api/data/taas/StructureObject.java | 8 ++ .../internal/api/data/taas/TableData.java | 5 ++ .../api/queue/LayoutParsingRequest.java | 1 + 17 files changed, 176 insertions(+), 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e9d62b3..813600e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -18,4 +18,4 @@ deploy: rules: - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH - if: $CI_COMMIT_BRANCH =~ /^release/ - - if: $CI_COMMIT_TAG \ No newline at end of file + - if: $CI_COMMIT_TAG diff --git a/README.md b/README.md index 8b13789..5a44c75 100644 --- a/README.md +++ b/README.md @@ -1 +1,89 @@ +# PDF Layout Parser Micro-Service: layout-parser +## Introduction +The layout-parser micro-service is a powerful tool designed to efficiently extract structured information from PDF documents. Written in Java and utilizing Spring Boot 3, Apache PDFBox, and RabbitMQ, this micro-service excels at parsing PDFs and organizing their content into a meaningful and coherent layout structure. Notably, the layout-parser micro-service distinguishes itself by relying solely on advanced algorithms, rather than machine learning techniques. + +### Key Steps in the PDF Layout Parsing Process: + +* **Text Position Extraction:** +The micro-service leverages Apache PDFBox to extract precise text positions for each individual character within the PDF document. + +* **Word Segmentation and Text Block Formation:** +Employing an array of diverse algorithms, the micro-service initially identifies and segments words, creating distinct text blocks. + +* **Text Block Classification:** +The segmented text blocks are then subjected to classification algorithms. These algorithms categorize the text blocks based on their content and visual properties, distinguishing between sections, subsections, headlines, paragraphs, images, tables, table cells, headers, and footers. + +* **Layout Coherence Establishment:** +The classified text blocks are subsequently orchestrated into a cohesive layout structure. This process involves arranging sections, subsections, paragraphs, images, and other elements in a logical and structured manner. + +* **Output Generation in Various Formats:** +Once the layout structure is established, the micro-service generates output in multiple formats. These formats are designed for seamless integration with downstream micro-services. The supported formats include JSON, XML, and others, ensuring flexibility in downstream data consumption. + +### Optional Enhancements: + +* **ML-Based Table Extraction:** +For enhanced results, users have the option to incorporate machine learning-based table extraction. This feature can be activated by providing ML-generated results as a JSON file, which are then integrated seamlessly into the layout structure. + +* **Image Classification using ML:** +Additionally, for more accurate image classification, users can optionally feed ML-generated image classification results into the micro-service. Similar to the table extraction option, the micro-service processes the pre-parsed results in JSON format, thus optimizing the accuracy of image content identification. + +In conclusion, the layout-parser micro-service is a versatile PDF layout parsing solution crafted entirely around advanced algorithms, without reliance on machine learning. It proficiently extracts text positions, segments content into meaningful blocks, classifies these blocks, arranges them coherently, and outputs structured data for downstream micro-services. Optional integration with ML-generated table extractions and image classifications further enhances its capabilities. + + + + + + +## Installation + +### Prerequisites + +Before building and using the layout-parser micro-service, please ensure you have the following software and tools installed: + +Java Development Kit (JDK) 17 or later +Gradle build tool (preinstalled) +Build and Test +To build and test the micro-service, follow these steps: + +### Clone the Repository: + +bash +``` +git clone ssh://git@git.knecon.com:22222/fforesight/layout-parser.git +cd layout-parser +``` +### Build the Project: +Use the following command to build the project using Gradle: + +``` +gradle clean build +``` +### Run Tests: +Run the test suite using the following command: +``` +gradle test +``` +## Building a Custom Docker Image +To create a custom Docker image for the layout-parser micro-service, execute the provided script: + +### Ensure Docker is Installed: +Ensure that Docker is installed and running on your system. + +### Run the Image Building Script: +Execute the publish-custom-image script in the project directory: + +``` +./publish-custom-image +``` +## Publishing to Internal Maven Repository +To publish the layout-parser micro-service to your internal Maven repository, execute the following command: + +``` +gradle -Pversion=buildVersion publish +``` +Replace buildVersion with the desired version number. + +## Additional Notes +Make sure to configure any necessary application properties before deploying the micro-service. +For advanced usage and configurations, refer to Kilian or Dom or preferably the source code. diff --git a/layoutparser-service/layoutparser-service-internal-api/build.gradle.kts b/layoutparser-service/layoutparser-service-internal-api/build.gradle.kts index 8457ae1..17fe195 100644 --- a/layoutparser-service/layoutparser-service-internal-api/build.gradle.kts +++ b/layoutparser-service/layoutparser-service-internal-api/build.gradle.kts @@ -4,3 +4,7 @@ plugins { } description = "layoutparser-service-internal-api" + +dependencies { + implementation("io.swagger.core.v3:swagger-annotations:2.2.15") +} diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentData.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentData.java index 7dcd7cb..b6cb371 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentData.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentData.java @@ -1,5 +1,6 @@ package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction; +import io.swagger.v3.oas.annotations.media.Schema; import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; @@ -10,11 +11,16 @@ import lombok.experimental.FieldDefaults; @Builder @AllArgsConstructor @FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) +@Schema(description = "Object containing the complete document layout parsing information. It is split into 4 categories, structure, text, positions and pages: " + "The document tree structure of SemanticNodes such as Section, Paragraph, Headline, etc. " + "The text, which is stored as separate blocks of data. " + "The text positions, which are also stored as separate blocks. The Blocks are equal to the text blocks in length and order. " + "The page information.") public class DocumentData { + @Schema(description = "Contains information about the document's pages.") DocumentPage[] documentPages; + @Schema(description = "Contains information about the document's text.") DocumentTextData[] documentTextData; + @Schema(description = "Contains information about the document's text positions.") DocumentPositionData[] documentPositions; + @Schema(description = "Contains information about the document's semantic structure.") DocumentStructure documentStructure; } diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentPage.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentPage.java index 262ea55..c3f8558 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentPage.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentPage.java @@ -1,5 +1,6 @@ package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction; +import io.swagger.v3.oas.annotations.media.Schema; import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; @@ -12,11 +13,16 @@ import lombok.experimental.FieldDefaults; @NoArgsConstructor @AllArgsConstructor @FieldDefaults(level = AccessLevel.PRIVATE) +@Schema(description = "Object containing information about the document's pages.") public class DocumentPage { + @Schema(description = "The page number, starting with 1.") int number; + @Schema(description = "The page height in PDF user units.", example = "792") int height; + @Schema(description = "The page width in PDF user units.", example = "694") int width; + @Schema(description = "The page rotation as specified by the PDF.", example = "90", allowableValues = {"0", "90", "180", "270"}) int rotation; } diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentPositionData.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentPositionData.java index aa60b9e..25bcd03 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentPositionData.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentPositionData.java @@ -1,5 +1,6 @@ package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction; +import io.swagger.v3.oas.annotations.media.Schema; import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; @@ -12,10 +13,14 @@ import lombok.experimental.FieldDefaults; @NoArgsConstructor @AllArgsConstructor @FieldDefaults(level = AccessLevel.PRIVATE) +@Schema(description = "Object containing text positional information of a specific text block. A document is split into multiple text blocks, which are supposed to be read in order. Every text block can only occur on a single page.") public class DocumentPositionData { + @Schema(description = "Identifier of the text block.") Long id; + @Schema(description = "For each string coordinate in the search text of the text block, the array contains an entry relating the string coordinate to the position coordinate. This is required due to the text and position coordinates not being equal.") int[] stringIdxToPositionIdx; + @Schema(description = "The bounding box for each glyph as a rectangle. This matrix is of size (n,4), where n is the number of glyphs in the text block. The second dimension specifies the rectangle with the value x, y, width, height, with x, y specifying the lower left corner. In order to access this information, the stringIdxToPositionIdx array must be used to transform the coordinates.") float[][] positions; } diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentStructure.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentStructure.java index 7768ee6..6852896 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentStructure.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentStructure.java @@ -6,6 +6,7 @@ import java.util.List; import java.util.Map; import java.util.stream.Stream; +import io.swagger.v3.oas.annotations.media.Schema; import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; @@ -18,11 +19,14 @@ import lombok.experimental.FieldDefaults; @NoArgsConstructor @AllArgsConstructor @FieldDefaults(level = AccessLevel.PRIVATE) +@Schema(description = "Object containing information about the parsed tree structure of the SemanticNodes, such as Section, Paragraph, Headline etc inside of the document.") public class DocumentStructure { + @Schema(description = "The root EntryData represents the Document.") EntryData root; + @Schema(description = "Object containing the extra field names, a table has in its properties field.") public static class TableProperties { public static final String NUMBER_OF_ROWS = "numberOfRows"; @@ -30,6 +34,7 @@ public class DocumentStructure { } + @Schema(description = "Object containing the extra field names, an Image has in its properties field.") public static class ImageProperties { public static final String TRANSPARENT = "transparent"; @@ -39,6 +44,7 @@ public class DocumentStructure { } + @Schema(description = "Object containing the extra field names, a table cell has in its properties field.") public static class TableCellProperties { public static final String B_BOX = "bBox"; @@ -93,13 +99,20 @@ public class DocumentStructure { @NoArgsConstructor @AllArgsConstructor @FieldDefaults(level = AccessLevel.PRIVATE) + @Schema(description = "Object containing information of a SemanticNode and also structuring the layout with children.") public static class EntryData { + @Schema(description = "Type of the semantic node.", allowableValues = {"DOCUMENT", "SECTION", "PARAGRAPH", "HEADLINE", "TABLE", "TABLE_CELL", "HEADER", "FOOTER", "IMAGE"}) NodeType type; + @Schema(description = "Specifies the position in the parsed tree structure.", example = "[1, 0, 2]") int[] treeId; + @Schema(description = "Specifies the text block IDs associated with this semantic node. The value should be joined with the DocumentTextData/DocumentPositionData. Is empty, if no text block is directly associated with this semantic node. Only Paragraph, Headline, Header or Footer is directly associated with a text block.", example = "[1]") Long[] atomicBlockIds; + @Schema(description = "Specifies the pages this semantic node appears on. The value should be joined with the PageData.", example = "[1, 2, 3]") Long[] pageNumbers; + @Schema(description = "Some semantic nodes have additional information, this information is stored in this Map. The extra fields are specified by the Properties subclasses.", example = "For a Table: {\"numberOfRows\": 3, \"numberOfCols\": 4}") Map properties; + @Schema(description = "All child Entries of this Entry.", example = "[1, 2, 3]") List children; diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentTextData.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentTextData.java index a3fd9d8..3abf3b5 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentTextData.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentTextData.java @@ -1,7 +1,6 @@ package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction; - - +import io.swagger.v3.oas.annotations.media.Schema; import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; @@ -14,14 +13,22 @@ import lombok.experimental.FieldDefaults; @NoArgsConstructor @AllArgsConstructor @FieldDefaults(level = AccessLevel.PRIVATE) +@Schema(description = "Object containing text information of a specific text block. A document is split into multiple text blocks, which are supposed to be read in order. Every text block can only occur on a single page.") public class DocumentTextData { + @Schema(description = "Identifier of the text block.") Long id; + @Schema(description = "The page the text block occurs on.") Long page; + @Schema(description = "The text the text block.") String searchText; + @Schema(description = "Each text block is assigned a number on a page, starting from 0.") int numberOnPage; + @Schema(description = "The text blocks are ordered, this number represents the start of the text block as a string offset.") int start; + @Schema(description = "The text blocks are ordered, this number represents the end of the text block as a string offset.") int end; + @Schema(description = "The line breaks in the text of this semantic node in string offsets. They are exclusive end. At the end of each semantic node there is an implicit linebreak.", example = "[5, 10]") int[] lineBreaks; } diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/SimplifiedSectionText.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/SimplifiedSectionText.java index e37f009..f8cc10b 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/SimplifiedSectionText.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/SimplifiedSectionText.java @@ -1,5 +1,6 @@ package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction; +import io.swagger.v3.oas.annotations.media.Schema; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; @@ -9,9 +10,12 @@ import lombok.NoArgsConstructor; @Builder @NoArgsConstructor @AllArgsConstructor +@Schema(description = "Object containing a simplified version, which contains almost exclusively text, of the document structure Section class.") public class SimplifiedSectionText { + @Schema(description = "The number of this Section. This is used to map the simplified section text back to the original Section.") private int sectionNumber; + @Schema(description = "The text in this Section.") private String text; } diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/SimplifiedText.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/SimplifiedText.java index 6ec78c3..f16d543 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/SimplifiedText.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/SimplifiedText.java @@ -3,6 +3,7 @@ package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction; import java.util.ArrayList; import java.util.List; +import io.swagger.v3.oas.annotations.media.Schema; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; @@ -12,9 +13,12 @@ import lombok.NoArgsConstructor; @Builder @NoArgsConstructor @AllArgsConstructor +@Schema(description = "Object containing a simplified version, which contains almost exclusively text, of the document structure.") public class SimplifiedText { + @Schema(description = "Number of pages in the entire document.") private int numberOfPages; + @Schema(description = "A List of simplified Sections, which contains almost exclusively text.") private List sectionTexts = new ArrayList<>(); } diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/ParagraphData.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/ParagraphData.java index d4c6251..08c7beb 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/ParagraphData.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/ParagraphData.java @@ -2,20 +2,29 @@ package com.knecon.fforesight.service.layoutparser.internal.api.data.taas; import java.util.List; +import io.swagger.v3.oas.annotations.media.Schema; import lombok.Builder; import lombok.Data; @Data @Builder +@Schema(description = "Object containing information about a Paragraph/Headline/Header/Footer.") public class ParagraphData { + @Schema(description = "The text of this Semantic Node, without any linebreaks.", example = "This is some text.") private String text; + @Schema(description = "A list of text ranges in string offsets. Every character in any of the ranges is bold.", example = "[0, 15]") List boldTextBoundaries; + @Schema(description = "A list of text ranges in string offsets. Every character in any of the ranges is italic.", example = "[0, 15]") List italicTextBoundaries; + @Schema(description = "The line breaks in the text of this semantic node in string offsets. They are exclusive end. At the end of each semantic node there is an implicit linebreak.", example = "[5, 10]") List linebreaks; + @Schema(description = "The classification of this Paragraph.", allowableValues = "{paragraph, headline, header, footer}") private String classification; + @Schema(description = "Describes the text orientation of this semantic node. Any semantic node only has a single text orientation.", allowableValues = "{ZERO, QUARTER_CIRCLE, HALF_CIRCLE, THREE_QUARTER_CIRCLE}") private String orientation; + @Schema(description = "Describes the text direction in degrees of this semantic node. Any semantic node only has a single text direction.", minimum = "0", maximum = "359") private int textDirection; } diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/Range.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/Range.java index a978cc0..df06caa 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/Range.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/Range.java @@ -1,5 +1,8 @@ package com.knecon.fforesight.service.layoutparser.internal.api.data.taas; +import io.swagger.v3.oas.annotations.media.Schema; + +@Schema(description = "Object specifying the start and end offsets of a text range in string offsets.") public record Range(int start, int end) { } diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/ResearchDocumentData.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/ResearchDocumentData.java index 667fdee..f834f14 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/ResearchDocumentData.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/ResearchDocumentData.java @@ -2,6 +2,7 @@ package com.knecon.fforesight.service.layoutparser.internal.api.data.taas; import java.util.List; +import io.swagger.v3.oas.annotations.media.Schema; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; @@ -9,8 +10,12 @@ import lombok.Data; @Builder @Data @AllArgsConstructor +@Schema(description = "Object containing a simplified version of the document structure. This simplified form only knows Paragraphs and Tables. The Paragraph Objects might be a Paragraph, Headline, Header or Footer.") public class ResearchDocumentData { + @Schema(description = "File name of the original uploaded file.") String originalFile; + @Schema(description = "A List of all paragraphs/headline or table objects, that have been parsed in this document.") List structureObjects; + } diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/RowData.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/RowData.java index 388275b..b7afb80 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/RowData.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/RowData.java @@ -2,14 +2,19 @@ package com.knecon.fforesight.service.layoutparser.internal.api.data.taas; import java.util.List; +import io.swagger.v3.oas.annotations.media.Schema; import lombok.AllArgsConstructor; import lombok.Data; @Data @AllArgsConstructor +@Schema(description = "Object containing information about a Table Row.") public class RowData { + @Schema(description = "Boolean indicating whether this table row is classified as a header row.") boolean header; + @Schema(description = "A list of Objects containing information about the text in each cell of this row.") List cellText; + @Schema(description = "The bounding box of this StructureObject. Is always exactly 4 values representing x, y, w, h, where x, y specify the lower left corner.") float[] bBox; } diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/StructureObject.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/StructureObject.java index fca1eff..74eb470 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/StructureObject.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/StructureObject.java @@ -1,5 +1,6 @@ package com.knecon.fforesight.service.layoutparser.internal.api.data.taas; +import io.swagger.v3.oas.annotations.media.Schema; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; @@ -7,13 +8,20 @@ import lombok.Data; @Data @Builder @AllArgsConstructor +@Schema(description = "Object containing information about either a Paragraph/Headline/Header/Footer or a Table.") public class StructureObject { + @Schema(description = "The ID of this StructureObject.") Integer structureObjectNumber; + @Schema(description = "This value indicates the start of the string offsets in this Object, with respect to the reading order.") int page; + @Schema(description = "This stringOffset indicates the start of the string offsets in this Object, with respect to the reading order of the entire document. It is equal to the previous' StructureObject stringOffset + its length.") int stringOffset; + @Schema(description = "The bounding box of this StructureObject. Is always exactly 4 values representing x, y, w, h, where x, y specify the lower left corner.", example = "[100, 100, 50, 50]") float[] boundingBox; + @Schema(description = "Object containing information about a Paragraph/Headline/Header/Footer. Either this or table is null.") ParagraphData paragraph; + @Schema(description = "Object containing information about a Table. Either this or paragraph is null.") TableData table; } diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/TableData.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/TableData.java index 513dae8..ccf9174 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/TableData.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/TableData.java @@ -2,15 +2,20 @@ package com.knecon.fforesight.service.layoutparser.internal.api.data.taas; import java.util.List; +import io.swagger.v3.oas.annotations.media.Schema; import lombok.AllArgsConstructor; import lombok.Data; @Data @AllArgsConstructor +@Schema(description = "Object containing information about a Table.") public class TableData { + @Schema(description = "A list of Objects containing information about all rows in this table.") List rowData; + @Schema(description = "Numer of columns in this table.") Integer numberOfCols; + @Schema(description = "Numer of rows in this table.") Integer numberOfRows; } diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingRequest.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingRequest.java index d3b45dd..e069b9d 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingRequest.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingRequest.java @@ -13,6 +13,7 @@ public record LayoutParsingRequest( @NonNull String originFileStorageId, Optional tablesFileStorageId, Optional imagesFileStorageId, + @NonNull String structureFileStorageId, String researchDocumentStorageId, @NonNull String textBlockFileStorageId,