diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/TableData.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/TableData.java index ccf9174..e7250cf 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/TableData.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/TableData.java @@ -13,9 +13,9 @@ public class TableData { @Schema(description = "A list of Objects containing information about all rows in this table.") List rowData; - @Schema(description = "Numer of columns in this table.") + @Schema(description = "Number of columns in this table.") Integer numberOfCols; - @Schema(description = "Numer of rows in this table.") + @Schema(description = "Number of rows in this table.") Integer numberOfRows; } diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingFinishedEvent.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingFinishedEvent.java index da1f17a..20253bf 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingFinishedEvent.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingFinishedEvent.java @@ -2,9 +2,19 @@ package com.knecon.fforesight.service.layoutparser.internal.api.queue; import java.util.Map; +import io.swagger.v3.oas.annotations.media.Schema; import lombok.Builder; @Builder -public record LayoutParsingFinishedEvent(Map identifier, long duration, int numberOfPages, String message) { +@Schema(description = "Object containing information about the layout parsing.") +public record LayoutParsingFinishedEvent( + @Schema(description = "General purpose identifier. It is returned exactly the same way it is inserted with the LayoutParsingRequest.") + Map identifier,// + @Schema(description = "The duration of a single layout parsing in ms.") + long duration,// + @Schema(description = "The number of pages of the parsed document.") + int numberOfPages,// + @Schema(description = "A general message. It contains some information useful for a developer, like the paths where the files are stored. Not meant to be machine readable.") + String message) { } diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingRequest.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingRequest.java index e069b9d..6d4c4c6 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingRequest.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingRequest.java @@ -3,24 +3,42 @@ package com.knecon.fforesight.service.layoutparser.internal.api.queue; import java.util.Map; import java.util.Optional; +import io.swagger.v3.oas.annotations.media.Schema; import lombok.Builder; import lombok.NonNull; @Builder +@Schema(description = "Object containing all storage paths the service needs to know.") public record LayoutParsingRequest( + @Schema(description = "Enum specifying the type of layout parsing to be performed.", allowableValues = "{RedactManager, DocuMine, TAAS}")// @NonNull LayoutParsingType layoutParsingType, - Map identifier, - @NonNull String originFileStorageId, - Optional tablesFileStorageId, - Optional imagesFileStorageId, - @NonNull String structureFileStorageId, - String researchDocumentStorageId, - @NonNull String textBlockFileStorageId, - @NonNull String positionBlockFileStorageId, - @NonNull String pageFileStorageId, - @NonNull String simplifiedTextStorageId, - @NonNull String viewerDocumentStorageId, + @Schema(description = "General purpose identifiers. They are not changed by the service at all and are returned as is in the response queue.")// + Map identifier, + + @Schema(description = "Path to the original PDF file.")// + @NonNull String originFileStorageId,// + @Schema(description = "Optional Path to the table extraction file.")// + Optional tablesFileStorageId,// + @Schema(description = "Optional Path to the image classification file.")// + Optional imagesFileStorageId,// + + @Schema(description = "Path where the Document Structure File will be stored.")// + @NonNull String structureFileStorageId,// + @Schema(description = "Path where the Research Data File will be stored.")// + String researchDocumentStorageId,// + @Schema(description = "Path where the Document Text File will be stored.")// + @NonNull String textBlockFileStorageId,// + @Schema(description = "Path where the Document Positions File will be stored.")// + @NonNull String positionBlockFileStorageId,// + @Schema(description = "Path where the Document Pages File will be stored.")// + @NonNull String pageFileStorageId,// + @Schema(description = "Path where the Simplified Text File will be stored.")// + @NonNull String simplifiedTextStorageId,// + @Schema(description = "Path where the Viewer Document PDF will be stored.")// + @NonNull String viewerDocumentStorageId,// + @Deprecated// + @Schema(description = "Path where the Section Grid will be stored.")// @NonNull String sectionGridStorageId) { } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/AbstractPageBlock.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/AbstractPageBlock.java index 42ef081..c791d27 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/AbstractPageBlock.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/AbstractPageBlock.java @@ -77,4 +77,7 @@ public abstract class AbstractPageBlock { return this.minY <= atc.getMaxY() && this.maxY >= atc.getMinY(); } + + public abstract boolean isEmpty(); + } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/ClassificationSection.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/ClassificationSection.java index 7074282..58fea4e 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/ClassificationSection.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/ClassificationSection.java @@ -2,6 +2,7 @@ package com.knecon.fforesight.service.layoutparser.processor.model; import java.util.ArrayList; import java.util.List; +import java.util.stream.Collectors; import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage; import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock; @@ -29,4 +30,10 @@ public class ClassificationSection { return tables; } + + public List getNonEmptyPageBlocks() { + + return pageBlocks.stream().filter(pageBlock -> !pageBlock.isEmpty()).collect(Collectors.toList()); + } + } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/table/TablePageBlock.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/table/TablePageBlock.java index 10331fe..a64f715 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/table/TablePageBlock.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/table/TablePageBlock.java @@ -42,6 +42,12 @@ public class TablePageBlock extends AbstractPageBlock { } + @Override + public boolean isEmpty() { + + return unrotatedColCount == 0 || unrotatedRowCount == 0; + } + public List> getRows() { if (rows == null) { @@ -304,6 +310,8 @@ public class TablePageBlock extends AbstractPageBlock { } + + public String getTextAsHtml() { StringBuilder sb = new StringBuilder(); diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPageBlock.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPageBlock.java index 285efd4..0442af6 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPageBlock.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPageBlock.java @@ -365,4 +365,11 @@ public class TextPageBlock extends AbstractPageBlock { } + + @Override + public boolean isEmpty() { + + return sequences.isEmpty(); + } + } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java index 83b2381..6bde310 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java @@ -64,7 +64,7 @@ public class DocumentGraphFactory { private void addSections(ClassificationDocument document, Context context) { - document.getSections().forEach(section -> SectionNodeFactory.addSection(null, section.getPageBlocks(), section.getImages(), context)); + document.getSections().forEach(section -> SectionNodeFactory.addSection(null, section.getNonEmptyPageBlocks(), section.getImages(), context)); }