TAAS-89: added some more documentation
* fixed weird bug with empty sections
This commit is contained in:
parent
11ba9c6bb9
commit
261ef4c367
@ -13,9 +13,9 @@ public class TableData {
|
||||
|
||||
@Schema(description = "A list of Objects containing information about all rows in this table.")
|
||||
List<RowData> rowData;
|
||||
@Schema(description = "Numer of columns in this table.")
|
||||
@Schema(description = "Number of columns in this table.")
|
||||
Integer numberOfCols;
|
||||
@Schema(description = "Numer of rows in this table.")
|
||||
@Schema(description = "Number of rows in this table.")
|
||||
Integer numberOfRows;
|
||||
|
||||
}
|
||||
|
||||
@ -2,9 +2,19 @@ package com.knecon.fforesight.service.layoutparser.internal.api.queue;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import lombok.Builder;
|
||||
|
||||
@Builder
|
||||
public record LayoutParsingFinishedEvent(Map<String, String> identifier, long duration, int numberOfPages, String message) {
|
||||
@Schema(description = "Object containing information about the layout parsing.")
|
||||
public record LayoutParsingFinishedEvent(
|
||||
@Schema(description = "General purpose identifier. It is returned exactly the same way it is inserted with the LayoutParsingRequest.")
|
||||
Map<String, String> identifier,//
|
||||
@Schema(description = "The duration of a single layout parsing in ms.")
|
||||
long duration,//
|
||||
@Schema(description = "The number of pages of the parsed document.")
|
||||
int numberOfPages,//
|
||||
@Schema(description = "A general message. It contains some information useful for a developer, like the paths where the files are stored. Not meant to be machine readable.")
|
||||
String message) {
|
||||
|
||||
}
|
||||
|
||||
@ -3,24 +3,42 @@ package com.knecon.fforesight.service.layoutparser.internal.api.queue;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import lombok.Builder;
|
||||
import lombok.NonNull;
|
||||
|
||||
@Builder
|
||||
@Schema(description = "Object containing all storage paths the service needs to know.")
|
||||
public record LayoutParsingRequest(
|
||||
@Schema(description = "Enum specifying the type of layout parsing to be performed.", allowableValues = "{RedactManager, DocuMine, TAAS}")//
|
||||
@NonNull LayoutParsingType layoutParsingType,
|
||||
Map<String, String> identifier,
|
||||
@NonNull String originFileStorageId,
|
||||
Optional<String> tablesFileStorageId,
|
||||
Optional<String> imagesFileStorageId,
|
||||
|
||||
@NonNull String structureFileStorageId,
|
||||
String researchDocumentStorageId,
|
||||
@NonNull String textBlockFileStorageId,
|
||||
@NonNull String positionBlockFileStorageId,
|
||||
@NonNull String pageFileStorageId,
|
||||
@NonNull String simplifiedTextStorageId,
|
||||
@NonNull String viewerDocumentStorageId,
|
||||
@Schema(description = "General purpose identifiers. They are not changed by the service at all and are returned as is in the response queue.")//
|
||||
Map<String, String> identifier,
|
||||
|
||||
@Schema(description = "Path to the original PDF file.")//
|
||||
@NonNull String originFileStorageId,//
|
||||
@Schema(description = "Optional Path to the table extraction file.")//
|
||||
Optional<String> tablesFileStorageId,//
|
||||
@Schema(description = "Optional Path to the image classification file.")//
|
||||
Optional<String> imagesFileStorageId,//
|
||||
|
||||
@Schema(description = "Path where the Document Structure File will be stored.")//
|
||||
@NonNull String structureFileStorageId,//
|
||||
@Schema(description = "Path where the Research Data File will be stored.")//
|
||||
String researchDocumentStorageId,//
|
||||
@Schema(description = "Path where the Document Text File will be stored.")//
|
||||
@NonNull String textBlockFileStorageId,//
|
||||
@Schema(description = "Path where the Document Positions File will be stored.")//
|
||||
@NonNull String positionBlockFileStorageId,//
|
||||
@Schema(description = "Path where the Document Pages File will be stored.")//
|
||||
@NonNull String pageFileStorageId,//
|
||||
@Schema(description = "Path where the Simplified Text File will be stored.")//
|
||||
@NonNull String simplifiedTextStorageId,//
|
||||
@Schema(description = "Path where the Viewer Document PDF will be stored.")//
|
||||
@NonNull String viewerDocumentStorageId,//
|
||||
@Deprecated//
|
||||
@Schema(description = "Path where the Section Grid will be stored.")//
|
||||
@NonNull String sectionGridStorageId) {
|
||||
|
||||
}
|
||||
|
||||
@ -77,4 +77,7 @@ public abstract class AbstractPageBlock {
|
||||
return this.minY <= atc.getMaxY() && this.maxY >= atc.getMinY();
|
||||
}
|
||||
|
||||
|
||||
public abstract boolean isEmpty();
|
||||
|
||||
}
|
||||
|
||||
@ -2,6 +2,7 @@ package com.knecon.fforesight.service.layoutparser.processor.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
|
||||
@ -29,4 +30,10 @@ public class ClassificationSection {
|
||||
return tables;
|
||||
}
|
||||
|
||||
|
||||
public List<AbstractPageBlock> getNonEmptyPageBlocks() {
|
||||
|
||||
return pageBlocks.stream().filter(pageBlock -> !pageBlock.isEmpty()).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -42,6 +42,12 @@ public class TablePageBlock extends AbstractPageBlock {
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean isEmpty() {
|
||||
|
||||
return unrotatedColCount == 0 || unrotatedRowCount == 0;
|
||||
}
|
||||
|
||||
public List<List<Cell>> getRows() {
|
||||
|
||||
if (rows == null) {
|
||||
@ -304,6 +310,8 @@ public class TablePageBlock extends AbstractPageBlock {
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
public String getTextAsHtml() {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
@ -365,4 +365,11 @@ public class TextPageBlock extends AbstractPageBlock {
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean isEmpty() {
|
||||
|
||||
return sequences.isEmpty();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -64,7 +64,7 @@ public class DocumentGraphFactory {
|
||||
|
||||
private void addSections(ClassificationDocument document, Context context) {
|
||||
|
||||
document.getSections().forEach(section -> SectionNodeFactory.addSection(null, section.getPageBlocks(), section.getImages(), context));
|
||||
document.getSections().forEach(section -> SectionNodeFactory.addSection(null, section.getNonEmptyPageBlocks(), section.getImages(), context));
|
||||
}
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user