TAAS-89: added some more documentation

* fixed weird bug with empty sections
This commit is contained in:
Kilian Schuettler 2023-08-31 10:49:09 +02:00
parent 11ba9c6bb9
commit 261ef4c367
8 changed files with 68 additions and 15 deletions

View File

@ -13,9 +13,9 @@ public class TableData {
@Schema(description = "A list of Objects containing information about all rows in this table.") @Schema(description = "A list of Objects containing information about all rows in this table.")
List<RowData> rowData; List<RowData> rowData;
@Schema(description = "Numer of columns in this table.") @Schema(description = "Number of columns in this table.")
Integer numberOfCols; Integer numberOfCols;
@Schema(description = "Numer of rows in this table.") @Schema(description = "Number of rows in this table.")
Integer numberOfRows; Integer numberOfRows;
} }

View File

@ -2,9 +2,19 @@ package com.knecon.fforesight.service.layoutparser.internal.api.queue;
import java.util.Map; import java.util.Map;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.Builder; import lombok.Builder;
@Builder @Builder
public record LayoutParsingFinishedEvent(Map<String, String> identifier, long duration, int numberOfPages, String message) { @Schema(description = "Object containing information about the layout parsing.")
public record LayoutParsingFinishedEvent(
@Schema(description = "General purpose identifier. It is returned exactly the same way it is inserted with the LayoutParsingRequest.")
Map<String, String> identifier,//
@Schema(description = "The duration of a single layout parsing in ms.")
long duration,//
@Schema(description = "The number of pages of the parsed document.")
int numberOfPages,//
@Schema(description = "A general message. It contains some information useful for a developer, like the paths where the files are stored. Not meant to be machine readable.")
String message) {
} }

View File

@ -3,24 +3,42 @@ package com.knecon.fforesight.service.layoutparser.internal.api.queue;
import java.util.Map; import java.util.Map;
import java.util.Optional; import java.util.Optional;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.Builder; import lombok.Builder;
import lombok.NonNull; import lombok.NonNull;
@Builder @Builder
@Schema(description = "Object containing all storage paths the service needs to know.")
public record LayoutParsingRequest( public record LayoutParsingRequest(
@Schema(description = "Enum specifying the type of layout parsing to be performed.", allowableValues = "{RedactManager, DocuMine, TAAS}")//
@NonNull LayoutParsingType layoutParsingType, @NonNull LayoutParsingType layoutParsingType,
Map<String, String> identifier,
@NonNull String originFileStorageId,
Optional<String> tablesFileStorageId,
Optional<String> imagesFileStorageId,
@NonNull String structureFileStorageId, @Schema(description = "General purpose identifiers. They are not changed by the service at all and are returned as is in the response queue.")//
String researchDocumentStorageId, Map<String, String> identifier,
@NonNull String textBlockFileStorageId,
@NonNull String positionBlockFileStorageId, @Schema(description = "Path to the original PDF file.")//
@NonNull String pageFileStorageId, @NonNull String originFileStorageId,//
@NonNull String simplifiedTextStorageId, @Schema(description = "Optional Path to the table extraction file.")//
@NonNull String viewerDocumentStorageId, Optional<String> tablesFileStorageId,//
@Schema(description = "Optional Path to the image classification file.")//
Optional<String> imagesFileStorageId,//
@Schema(description = "Path where the Document Structure File will be stored.")//
@NonNull String structureFileStorageId,//
@Schema(description = "Path where the Research Data File will be stored.")//
String researchDocumentStorageId,//
@Schema(description = "Path where the Document Text File will be stored.")//
@NonNull String textBlockFileStorageId,//
@Schema(description = "Path where the Document Positions File will be stored.")//
@NonNull String positionBlockFileStorageId,//
@Schema(description = "Path where the Document Pages File will be stored.")//
@NonNull String pageFileStorageId,//
@Schema(description = "Path where the Simplified Text File will be stored.")//
@NonNull String simplifiedTextStorageId,//
@Schema(description = "Path where the Viewer Document PDF will be stored.")//
@NonNull String viewerDocumentStorageId,//
@Deprecated//
@Schema(description = "Path where the Section Grid will be stored.")//
@NonNull String sectionGridStorageId) { @NonNull String sectionGridStorageId) {
} }

View File

@ -77,4 +77,7 @@ public abstract class AbstractPageBlock {
return this.minY <= atc.getMaxY() && this.maxY >= atc.getMinY(); return this.minY <= atc.getMaxY() && this.maxY >= atc.getMinY();
} }
public abstract boolean isEmpty();
} }

View File

@ -2,6 +2,7 @@ package com.knecon.fforesight.service.layoutparser.processor.model;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.stream.Collectors;
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage; import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock; import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
@ -29,4 +30,10 @@ public class ClassificationSection {
return tables; return tables;
} }
public List<AbstractPageBlock> getNonEmptyPageBlocks() {
return pageBlocks.stream().filter(pageBlock -> !pageBlock.isEmpty()).collect(Collectors.toList());
}
} }

View File

@ -42,6 +42,12 @@ public class TablePageBlock extends AbstractPageBlock {
} }
@Override
public boolean isEmpty() {
return unrotatedColCount == 0 || unrotatedRowCount == 0;
}
public List<List<Cell>> getRows() { public List<List<Cell>> getRows() {
if (rows == null) { if (rows == null) {
@ -304,6 +310,8 @@ public class TablePageBlock extends AbstractPageBlock {
} }
public String getTextAsHtml() { public String getTextAsHtml() {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();

View File

@ -365,4 +365,11 @@ public class TextPageBlock extends AbstractPageBlock {
} }
@Override
public boolean isEmpty() {
return sequences.isEmpty();
}
} }

View File

@ -64,7 +64,7 @@ public class DocumentGraphFactory {
private void addSections(ClassificationDocument document, Context context) { private void addSections(ClassificationDocument document, Context context) {
document.getSections().forEach(section -> SectionNodeFactory.addSection(null, section.getPageBlocks(), section.getImages(), context)); document.getSections().forEach(section -> SectionNodeFactory.addSection(null, section.getNonEmptyPageBlocks(), section.getImages(), context));
} }