TAAS-89: added some more documentation

* fixed weird bug with empty sections
This commit is contained in:
Kilian Schuettler 2023-08-31 10:49:09 +02:00
parent 11ba9c6bb9
commit 261ef4c367
8 changed files with 68 additions and 15 deletions

View File

@ -13,9 +13,9 @@ public class TableData {
@Schema(description = "A list of Objects containing information about all rows in this table.")
List<RowData> rowData;
@Schema(description = "Numer of columns in this table.")
@Schema(description = "Number of columns in this table.")
Integer numberOfCols;
@Schema(description = "Numer of rows in this table.")
@Schema(description = "Number of rows in this table.")
Integer numberOfRows;
}

View File

@ -2,9 +2,19 @@ package com.knecon.fforesight.service.layoutparser.internal.api.queue;
import java.util.Map;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.Builder;
@Builder
public record LayoutParsingFinishedEvent(Map<String, String> identifier, long duration, int numberOfPages, String message) {
@Schema(description = "Object containing information about the layout parsing.")
public record LayoutParsingFinishedEvent(
@Schema(description = "General purpose identifier. It is returned exactly the same way it is inserted with the LayoutParsingRequest.")
Map<String, String> identifier,//
@Schema(description = "The duration of a single layout parsing in ms.")
long duration,//
@Schema(description = "The number of pages of the parsed document.")
int numberOfPages,//
@Schema(description = "A general message. It contains some information useful for a developer, like the paths where the files are stored. Not meant to be machine readable.")
String message) {
}

View File

@ -3,24 +3,42 @@ package com.knecon.fforesight.service.layoutparser.internal.api.queue;
import java.util.Map;
import java.util.Optional;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.Builder;
import lombok.NonNull;
@Builder
@Schema(description = "Object containing all storage paths the service needs to know.")
public record LayoutParsingRequest(
@Schema(description = "Enum specifying the type of layout parsing to be performed.", allowableValues = "{RedactManager, DocuMine, TAAS}")//
@NonNull LayoutParsingType layoutParsingType,
Map<String, String> identifier,
@NonNull String originFileStorageId,
Optional<String> tablesFileStorageId,
Optional<String> imagesFileStorageId,
@NonNull String structureFileStorageId,
String researchDocumentStorageId,
@NonNull String textBlockFileStorageId,
@NonNull String positionBlockFileStorageId,
@NonNull String pageFileStorageId,
@NonNull String simplifiedTextStorageId,
@NonNull String viewerDocumentStorageId,
@Schema(description = "General purpose identifiers. They are not changed by the service at all and are returned as is in the response queue.")//
Map<String, String> identifier,
@Schema(description = "Path to the original PDF file.")//
@NonNull String originFileStorageId,//
@Schema(description = "Optional Path to the table extraction file.")//
Optional<String> tablesFileStorageId,//
@Schema(description = "Optional Path to the image classification file.")//
Optional<String> imagesFileStorageId,//
@Schema(description = "Path where the Document Structure File will be stored.")//
@NonNull String structureFileStorageId,//
@Schema(description = "Path where the Research Data File will be stored.")//
String researchDocumentStorageId,//
@Schema(description = "Path where the Document Text File will be stored.")//
@NonNull String textBlockFileStorageId,//
@Schema(description = "Path where the Document Positions File will be stored.")//
@NonNull String positionBlockFileStorageId,//
@Schema(description = "Path where the Document Pages File will be stored.")//
@NonNull String pageFileStorageId,//
@Schema(description = "Path where the Simplified Text File will be stored.")//
@NonNull String simplifiedTextStorageId,//
@Schema(description = "Path where the Viewer Document PDF will be stored.")//
@NonNull String viewerDocumentStorageId,//
@Deprecated//
@Schema(description = "Path where the Section Grid will be stored.")//
@NonNull String sectionGridStorageId) {
}

View File

@ -77,4 +77,7 @@ public abstract class AbstractPageBlock {
return this.minY <= atc.getMaxY() && this.maxY >= atc.getMinY();
}
public abstract boolean isEmpty();
}

View File

@ -2,6 +2,7 @@ package com.knecon.fforesight.service.layoutparser.processor.model;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
@ -29,4 +30,10 @@ public class ClassificationSection {
return tables;
}
public List<AbstractPageBlock> getNonEmptyPageBlocks() {
return pageBlocks.stream().filter(pageBlock -> !pageBlock.isEmpty()).collect(Collectors.toList());
}
}

View File

@ -42,6 +42,12 @@ public class TablePageBlock extends AbstractPageBlock {
}
@Override
public boolean isEmpty() {
return unrotatedColCount == 0 || unrotatedRowCount == 0;
}
public List<List<Cell>> getRows() {
if (rows == null) {
@ -304,6 +310,8 @@ public class TablePageBlock extends AbstractPageBlock {
}
public String getTextAsHtml() {
StringBuilder sb = new StringBuilder();

View File

@ -365,4 +365,11 @@ public class TextPageBlock extends AbstractPageBlock {
}
@Override
public boolean isEmpty() {
return sequences.isEmpty();
}
}

View File

@ -64,7 +64,7 @@ public class DocumentGraphFactory {
private void addSections(ClassificationDocument document, Context context) {
document.getSections().forEach(section -> SectionNodeFactory.addSection(null, section.getPageBlocks(), section.getImages(), context));
document.getSections().forEach(section -> SectionNodeFactory.addSection(null, section.getNonEmptyPageBlocks(), section.getImages(), context));
}