RED-9139: add new TableOfContents Node

* rename previous TableOfContent to SectionTree
* added protobuf compile script
This commit is contained in:
Kilian Schuettler 2024-11-08 14:40:54 +01:00
parent 1384584e2f
commit 6e04c15f3d
47 changed files with 9860 additions and 11226 deletions

View File

@ -7,5 +7,5 @@ description = "layoutparser-service-internal-api"
dependencies {
implementation("io.swagger.core.v3:swagger-annotations:2.2.15")
implementation("com.google.protobuf:protobuf-java-util:4.27.1")
api("com.google.protobuf:protobuf-java-util:4.28.3")
}

View File

@ -1,16 +1,14 @@
package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
import static com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructureProto.DocumentStructure;
import static com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.EntryDataProto.EntryData;
import java.awt.geom.Rectangle2D;
import java.io.ObjectStreamException;
import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Stream;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.EntryDataProto.EntryData;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.AllArgsConstructor;
import lombok.Getter;

View File

@ -1,193 +1,177 @@
package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
// Generated by the protocol buffer compiler. DO NOT EDIT!
// NO CHECKED-IN PROTOBUF GENCODE
// source: LayoutEngine.proto
// Protobuf Java Version: 4.27.1
@SuppressWarnings("all")
// Protobuf Java Version: 4.28.3
package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
public final class LayoutEngineProto {
private LayoutEngineProto() {}
static {
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
/* major= */ 4,
/* minor= */ 28,
/* patch= */ 3,
/* suffix= */ "",
LayoutEngineProto.class.getName());
}
public static void registerAllExtensions(
com.google.protobuf.ExtensionRegistryLite registry) {
}
private LayoutEngineProto() {}
public static void registerAllExtensions(
com.google.protobuf.ExtensionRegistry registry) {
registerAllExtensions(
(com.google.protobuf.ExtensionRegistryLite) registry);
}
/**
* Protobuf enum {@code LayoutEngine}
*/
public enum LayoutEngine
implements com.google.protobuf.ProtocolMessageEnum {
/**
* <code>ALGORITHM = 0;</code>
*/
ALGORITHM(0),
/**
* <code>AI = 1;</code>
*/
AI(1),
/**
* <code>OUTLINE = 2;</code>
*/
OUTLINE(2),
UNRECOGNIZED(-1),
;
static {
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
/* major= */ 4,
/* minor= */ 27,
/* patch= */ 1,
/* suffix= */ "", LayoutEngineProto.class.getName());
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
/* major= */ 4,
/* minor= */ 28,
/* patch= */ 3,
/* suffix= */ "",
LayoutEngine.class.getName());
}
/**
* <code>ALGORITHM = 0;</code>
*/
public static final int ALGORITHM_VALUE = 0;
/**
* <code>AI = 1;</code>
*/
public static final int AI_VALUE = 1;
/**
* <code>OUTLINE = 2;</code>
*/
public static final int OUTLINE_VALUE = 2;
public static void registerAllExtensions(com.google.protobuf.ExtensionRegistryLite registry) {
public final int getNumber() {
if (this == UNRECOGNIZED) {
throw new java.lang.IllegalArgumentException(
"Can't get the number of an unknown enum value.");
}
return value;
}
public static void registerAllExtensions(com.google.protobuf.ExtensionRegistry registry) {
registerAllExtensions((com.google.protobuf.ExtensionRegistryLite) registry);
}
/**
* Protobuf enum {@code LayoutEngine}
* @param value The numeric wire value of the corresponding enum entry.
* @return The enum associated with the given numeric wire value.
* @deprecated Use {@link #forNumber(int)} instead.
*/
public enum LayoutEngine implements com.google.protobuf.ProtocolMessageEnum {
/**
* <code>ALGORITHM = 0;</code>
*/
ALGORITHM(0),
/**
* <code>AI = 1;</code>
*/
AI(1),
/**
* <code>OUTLINE = 2;</code>
*/
OUTLINE(2),
UNRECOGNIZED(-1),
;
@java.lang.Deprecated
public static LayoutEngine valueOf(int value) {
return forNumber(value);
}
/**
* @param value The numeric wire value of the corresponding enum entry.
* @return The enum associated with the given numeric wire value.
*/
public static LayoutEngine forNumber(int value) {
switch (value) {
case 0: return ALGORITHM;
case 1: return AI;
case 2: return OUTLINE;
default: return null;
}
}
static {
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
/* major= */ 4,
/* minor= */ 27,
/* patch= */ 1,
/* suffix= */ "", LayoutEngine.class.getName());
}
/**
* <code>ALGORITHM = 0;</code>
*/
public static final int ALGORITHM_VALUE = 0;
/**
* <code>AI = 1;</code>
*/
public static final int AI_VALUE = 1;
/**
* <code>OUTLINE = 2;</code>
*/
public static final int OUTLINE_VALUE = 2;
public final int getNumber() {
if (this == UNRECOGNIZED) {
throw new IllegalArgumentException("Can't get the number of an unknown enum value.");
}
return value;
}
/**
* @param value The numeric wire value of the corresponding enum entry.
* @return The enum associated with the given numeric wire value.
* @deprecated Use {@link #forNumber(int)} instead.
*/
@Deprecated
public static LayoutEngine valueOf(int value) {
return forNumber(value);
}
/**
* @param value The numeric wire value of the corresponding enum entry.
* @return The enum associated with the given numeric wire value.
*/
public static LayoutEngine forNumber(int value) {
switch (value) {
case 0:
return ALGORITHM;
case 1:
return AI;
case 2:
return OUTLINE;
default:
return null;
}
}
public static com.google.protobuf.Internal.EnumLiteMap<LayoutEngine> internalGetValueMap() {
return internalValueMap;
}
private static final com.google.protobuf.Internal.EnumLiteMap<LayoutEngine> internalValueMap = new com.google.protobuf.Internal.EnumLiteMap<LayoutEngine>() {
public static com.google.protobuf.Internal.EnumLiteMap<LayoutEngine>
internalGetValueMap() {
return internalValueMap;
}
private static final com.google.protobuf.Internal.EnumLiteMap<
LayoutEngine> internalValueMap =
new com.google.protobuf.Internal.EnumLiteMap<LayoutEngine>() {
public LayoutEngine findValueByNumber(int number) {
return LayoutEngine.forNumber(number);
return LayoutEngine.forNumber(number);
}
};
};
public final com.google.protobuf.Descriptors.EnumValueDescriptor getValueDescriptor() {
if (this == UNRECOGNIZED) {
throw new IllegalStateException("Can't get the descriptor of an unrecognized enum value.");
}
return getDescriptor().getValues()
.get(ordinal());
}
public final com.google.protobuf.Descriptors.EnumDescriptor getDescriptorForType() {
return getDescriptor();
}
public static final com.google.protobuf.Descriptors.EnumDescriptor getDescriptor() {
return LayoutEngineProto.getDescriptor().getEnumTypes()
.get(0);
}
private static final LayoutEngine[] VALUES = values();
public static LayoutEngine valueOf(com.google.protobuf.Descriptors.EnumValueDescriptor desc) {
if (desc.getType() != getDescriptor()) {
throw new IllegalArgumentException("EnumValueDescriptor is not for this type.");
}
if (desc.getIndex() == -1) {
return UNRECOGNIZED;
}
return VALUES[desc.getIndex()];
}
private final int value;
private LayoutEngine(int value) {
this.value = value;
}
// @@protoc_insertion_point(enum_scope:LayoutEngine)
public final com.google.protobuf.Descriptors.EnumValueDescriptor
getValueDescriptor() {
if (this == UNRECOGNIZED) {
throw new java.lang.IllegalStateException(
"Can't get the descriptor of an unrecognized enum value.");
}
return getDescriptor().getValues().get(ordinal());
}
public final com.google.protobuf.Descriptors.EnumDescriptor
getDescriptorForType() {
return getDescriptor();
}
public static final com.google.protobuf.Descriptors.EnumDescriptor
getDescriptor() {
return com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngineProto.getDescriptor().getEnumTypes().get(0);
}
private static final LayoutEngine[] VALUES = values();
public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() {
return descriptor;
public static LayoutEngine valueOf(
com.google.protobuf.Descriptors.EnumValueDescriptor desc) {
if (desc.getType() != getDescriptor()) {
throw new java.lang.IllegalArgumentException(
"EnumValueDescriptor is not for this type.");
}
if (desc.getIndex() == -1) {
return UNRECOGNIZED;
}
return VALUES[desc.getIndex()];
}
private final int value;
private static com.google.protobuf.Descriptors.FileDescriptor descriptor;
static {
String[] descriptorData = {"\n\022LayoutEngine.proto*2\n\014LayoutEngine\022\r\n\t" + "ALGORITHM\020\000\022\006\n\002AI\020\001\022\013\n\007OUTLINE\020\002b\006proto3"};
descriptor = com.google.protobuf.Descriptors.FileDescriptor.internalBuildGeneratedFileFrom(descriptorData, new com.google.protobuf.Descriptors.FileDescriptor[]{});
descriptor.resolveAllFeaturesImmutable();
private LayoutEngine(int value) {
this.value = value;
}
// @@protoc_insertion_point(outer_class_scope)
// @@protoc_insertion_point(enum_scope:LayoutEngine)
}
public static com.google.protobuf.Descriptors.FileDescriptor
getDescriptor() {
return descriptor;
}
private static com.google.protobuf.Descriptors.FileDescriptor
descriptor;
static {
java.lang.String[] descriptorData = {
"\n\022LayoutEngine.proto*2\n\014LayoutEngine\022\r\n\t" +
"ALGORITHM\020\000\022\006\n\002AI\020\001\022\013\n\007OUTLINE\020\002B[\nFcom." +
"knecon.fforesight.service.layoutparser.i" +
"nternal.api.data.redactionB\021LayoutEngine" +
"Protob\006proto3"
};
descriptor = com.google.protobuf.Descriptors.FileDescriptor
.internalBuildGeneratedFileFrom(descriptorData,
new com.google.protobuf.Descriptors.FileDescriptor[] {
});
descriptor.resolveAllFeaturesImmutable();
}
// @@protoc_insertion_point(outer_class_scope)
}

View File

@ -1,274 +1,261 @@
package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
import java.util.Locale;
// Generated by the protocol buffer compiler. DO NOT EDIT!
// NO CHECKED-IN PROTOBUF GENCODE
// source: NodeType.proto
// Protobuf Java Version: 4.27.1
@SuppressWarnings("all")
// Protobuf Java Version: 4.28.3
package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
public final class NodeTypeProto {
private NodeTypeProto() {}
static {
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
/* major= */ 4,
/* minor= */ 28,
/* patch= */ 3,
/* suffix= */ "",
NodeTypeProto.class.getName());
}
public static void registerAllExtensions(
com.google.protobuf.ExtensionRegistryLite registry) {
}
private NodeTypeProto() {}
public static void registerAllExtensions(
com.google.protobuf.ExtensionRegistry registry) {
registerAllExtensions(
(com.google.protobuf.ExtensionRegistryLite) registry);
}
/**
* Protobuf enum {@code NodeType}
*/
public enum NodeType
implements com.google.protobuf.ProtocolMessageEnum {
/**
* <code>DOCUMENT = 0;</code>
*/
DOCUMENT(0),
/**
* <code>SECTION = 1;</code>
*/
SECTION(1),
/**
* <code>SUPER_SECTION = 2;</code>
*/
SUPER_SECTION(2),
/**
* <code>HEADLINE = 3;</code>
*/
HEADLINE(3),
/**
* <code>PARAGRAPH = 4;</code>
*/
PARAGRAPH(4),
/**
* <code>TABLE = 5;</code>
*/
TABLE(5),
/**
* <code>TABLE_CELL = 6;</code>
*/
TABLE_CELL(6),
/**
* <code>IMAGE = 7;</code>
*/
IMAGE(7),
/**
* <code>HEADER = 8;</code>
*/
HEADER(8),
/**
* <code>FOOTER = 9;</code>
*/
FOOTER(9),
/**
* <code>TABLE_OF_CONTENTS = 10;</code>
*/
TABLE_OF_CONTENTS(10),
/**
* <code>TABLE_OF_CONTENTS_ITEM = 11;</code>
*/
TABLE_OF_CONTENTS_ITEM(11),
UNRECOGNIZED(-1),
;
static {
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
/* major= */ 4,
/* minor= */ 27,
/* patch= */ 1,
/* suffix= */ "", NodeTypeProto.class.getName());
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
/* major= */ 4,
/* minor= */ 28,
/* patch= */ 3,
/* suffix= */ "",
NodeType.class.getName());
}
/**
* <code>DOCUMENT = 0;</code>
*/
public static final int DOCUMENT_VALUE = 0;
/**
* <code>SECTION = 1;</code>
*/
public static final int SECTION_VALUE = 1;
/**
* <code>SUPER_SECTION = 2;</code>
*/
public static final int SUPER_SECTION_VALUE = 2;
/**
* <code>HEADLINE = 3;</code>
*/
public static final int HEADLINE_VALUE = 3;
/**
* <code>PARAGRAPH = 4;</code>
*/
public static final int PARAGRAPH_VALUE = 4;
/**
* <code>TABLE = 5;</code>
*/
public static final int TABLE_VALUE = 5;
/**
* <code>TABLE_CELL = 6;</code>
*/
public static final int TABLE_CELL_VALUE = 6;
/**
* <code>IMAGE = 7;</code>
*/
public static final int IMAGE_VALUE = 7;
/**
* <code>HEADER = 8;</code>
*/
public static final int HEADER_VALUE = 8;
/**
* <code>FOOTER = 9;</code>
*/
public static final int FOOTER_VALUE = 9;
/**
* <code>TABLE_OF_CONTENTS = 10;</code>
*/
public static final int TABLE_OF_CONTENTS_VALUE = 10;
/**
* <code>TABLE_OF_CONTENTS_ITEM = 11;</code>
*/
public static final int TABLE_OF_CONTENTS_ITEM_VALUE = 11;
public static void registerAllExtensions(com.google.protobuf.ExtensionRegistryLite registry) {
public final int getNumber() {
if (this == UNRECOGNIZED) {
throw new java.lang.IllegalArgumentException(
"Can't get the number of an unknown enum value.");
}
return value;
}
public static void registerAllExtensions(com.google.protobuf.ExtensionRegistry registry) {
registerAllExtensions((com.google.protobuf.ExtensionRegistryLite) registry);
}
/**
* Protobuf enum {@code NodeType}
* @param value The numeric wire value of the corresponding enum entry.
* @return The enum associated with the given numeric wire value.
* @deprecated Use {@link #forNumber(int)} instead.
*/
public enum NodeType implements com.google.protobuf.ProtocolMessageEnum {
/**
* <code>DOCUMENT = 0;</code>
*/
DOCUMENT(0),
/**
* <code>SECTION = 1;</code>
*/
SECTION(1),
/**
* <code>SUPER_SECTION = 2;</code>
*/
SUPER_SECTION(2),
/**
* <code>HEADLINE = 3;</code>
*/
HEADLINE(3),
/**
* <code>PARAGRAPH = 4;</code>
*/
PARAGRAPH(4),
/**
* <code>TABLE = 5;</code>
*/
TABLE(5),
/**
* <code>TABLE_CELL = 6;</code>
*/
TABLE_CELL(6),
/**
* <code>IMAGE = 7;</code>
*/
IMAGE(7),
/**
* <code>HEADER = 8;</code>
*/
HEADER(8),
/**
* <code>FOOTER = 9;</code>
*/
FOOTER(9),
UNRECOGNIZED(-1),
;
@java.lang.Deprecated
public static NodeType valueOf(int value) {
return forNumber(value);
}
/**
* @param value The numeric wire value of the corresponding enum entry.
* @return The enum associated with the given numeric wire value.
*/
public static NodeType forNumber(int value) {
switch (value) {
case 0: return DOCUMENT;
case 1: return SECTION;
case 2: return SUPER_SECTION;
case 3: return HEADLINE;
case 4: return PARAGRAPH;
case 5: return TABLE;
case 6: return TABLE_CELL;
case 7: return IMAGE;
case 8: return HEADER;
case 9: return FOOTER;
case 10: return TABLE_OF_CONTENTS;
case 11: return TABLE_OF_CONTENTS_ITEM;
default: return null;
}
}
public String toString() {
return this.name().charAt(0) + this.name().substring(1).toLowerCase(Locale.ROOT);
}
static {
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
/* major= */ 4,
/* minor= */ 27,
/* patch= */ 1,
/* suffix= */ "", NodeType.class.getName());
}
/**
* <code>DOCUMENT = 0;</code>
*/
public static final int DOCUMENT_VALUE = 0;
/**
* <code>SECTION = 1;</code>
*/
public static final int SECTION_VALUE = 1;
/**
* <code>SUPER_SECTION = 2;</code>
*/
public static final int SUPER_SECTION_VALUE = 2;
/**
* <code>HEADLINE = 3;</code>
*/
public static final int HEADLINE_VALUE = 3;
/**
* <code>PARAGRAPH = 4;</code>
*/
public static final int PARAGRAPH_VALUE = 4;
/**
* <code>TABLE = 5;</code>
*/
public static final int TABLE_VALUE = 5;
/**
* <code>TABLE_CELL = 6;</code>
*/
public static final int TABLE_CELL_VALUE = 6;
/**
* <code>IMAGE = 7;</code>
*/
public static final int IMAGE_VALUE = 7;
/**
* <code>HEADER = 8;</code>
*/
public static final int HEADER_VALUE = 8;
/**
* <code>FOOTER = 9;</code>
*/
public static final int FOOTER_VALUE = 9;
public final int getNumber() {
if (this == UNRECOGNIZED) {
throw new IllegalArgumentException("Can't get the number of an unknown enum value.");
}
return value;
}
/**
* @param value The numeric wire value of the corresponding enum entry.
* @return The enum associated with the given numeric wire value.
* @deprecated Use {@link #forNumber(int)} instead.
*/
@Deprecated
public static NodeType valueOf(int value) {
return forNumber(value);
}
/**
* @param value The numeric wire value of the corresponding enum entry.
* @return The enum associated with the given numeric wire value.
*/
public static NodeType forNumber(int value) {
switch (value) {
case 0:
return DOCUMENT;
case 1:
return SECTION;
case 2:
return SUPER_SECTION;
case 3:
return HEADLINE;
case 4:
return PARAGRAPH;
case 5:
return TABLE;
case 6:
return TABLE_CELL;
case 7:
return IMAGE;
case 8:
return HEADER;
case 9:
return FOOTER;
default:
return null;
}
}
public static com.google.protobuf.Internal.EnumLiteMap<NodeType> internalGetValueMap() {
return internalValueMap;
}
private static final com.google.protobuf.Internal.EnumLiteMap<NodeType> internalValueMap = new com.google.protobuf.Internal.EnumLiteMap<NodeType>() {
public static com.google.protobuf.Internal.EnumLiteMap<NodeType>
internalGetValueMap() {
return internalValueMap;
}
private static final com.google.protobuf.Internal.EnumLiteMap<
NodeType> internalValueMap =
new com.google.protobuf.Internal.EnumLiteMap<NodeType>() {
public NodeType findValueByNumber(int number) {
return NodeType.forNumber(number);
return NodeType.forNumber(number);
}
};
};
public final com.google.protobuf.Descriptors.EnumValueDescriptor getValueDescriptor() {
if (this == UNRECOGNIZED) {
throw new IllegalStateException("Can't get the descriptor of an unrecognized enum value.");
}
return getDescriptor().getValues()
.get(ordinal());
}
public final com.google.protobuf.Descriptors.EnumDescriptor getDescriptorForType() {
return getDescriptor();
}
public static final com.google.protobuf.Descriptors.EnumDescriptor getDescriptor() {
return NodeTypeProto.getDescriptor().getEnumTypes()
.get(0);
}
private static final NodeType[] VALUES = values();
public static NodeType valueOf(com.google.protobuf.Descriptors.EnumValueDescriptor desc) {
if (desc.getType() != getDescriptor()) {
throw new IllegalArgumentException("EnumValueDescriptor is not for this type.");
}
if (desc.getIndex() == -1) {
return UNRECOGNIZED;
}
return VALUES[desc.getIndex()];
}
private final int value;
private NodeType(int value) {
this.value = value;
}
// @@protoc_insertion_point(enum_scope:NodeType)
public final com.google.protobuf.Descriptors.EnumValueDescriptor
getValueDescriptor() {
if (this == UNRECOGNIZED) {
throw new java.lang.IllegalStateException(
"Can't get the descriptor of an unrecognized enum value.");
}
return getDescriptor().getValues().get(ordinal());
}
public final com.google.protobuf.Descriptors.EnumDescriptor
getDescriptorForType() {
return getDescriptor();
}
public static final com.google.protobuf.Descriptors.EnumDescriptor
getDescriptor() {
return com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeTypeProto.getDescriptor().getEnumTypes().get(0);
}
private static final NodeType[] VALUES = values();
public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() {
return descriptor;
public static NodeType valueOf(
com.google.protobuf.Descriptors.EnumValueDescriptor desc) {
if (desc.getType() != getDescriptor()) {
throw new java.lang.IllegalArgumentException(
"EnumValueDescriptor is not for this type.");
}
if (desc.getIndex() == -1) {
return UNRECOGNIZED;
}
return VALUES[desc.getIndex()];
}
private final int value;
private static com.google.protobuf.Descriptors.FileDescriptor descriptor;
static {
String[] descriptorData = {"\n\016NodeType.proto*\223\001\n\010NodeType\022\014\n\010DOCUMEN"
+ "T\020\000\022\013\n\007SECTION\020\001\022\021\n\rSUPER_SECTION\020\002\022\014\n\010H"
+ "EADLINE\020\003\022\r\n\tPARAGRAPH\020\004\022\t\n\005TABLE\020\005\022\016\n\nT"
+ "ABLE_CELL\020\006\022\t\n\005IMAGE\020\007\022\n\n\006HEADER\020\010\022\n\n\006FO"
+ "OTER\020\tb\006proto3"};
descriptor = com.google.protobuf.Descriptors.FileDescriptor.internalBuildGeneratedFileFrom(descriptorData, new com.google.protobuf.Descriptors.FileDescriptor[]{});
descriptor.resolveAllFeaturesImmutable();
private NodeType(int value) {
this.value = value;
}
// @@protoc_insertion_point(outer_class_scope)
// @@protoc_insertion_point(enum_scope:NodeType)
}
public static com.google.protobuf.Descriptors.FileDescriptor
getDescriptor() {
return descriptor;
}
private static com.google.protobuf.Descriptors.FileDescriptor
descriptor;
static {
java.lang.String[] descriptorData = {
"\n\016NodeType.proto*\306\001\n\010NodeType\022\014\n\010DOCUMEN" +
"T\020\000\022\013\n\007SECTION\020\001\022\021\n\rSUPER_SECTION\020\002\022\014\n\010H" +
"EADLINE\020\003\022\r\n\tPARAGRAPH\020\004\022\t\n\005TABLE\020\005\022\016\n\nT" +
"ABLE_CELL\020\006\022\t\n\005IMAGE\020\007\022\n\n\006HEADER\020\010\022\n\n\006FO" +
"OTER\020\t\022\025\n\021TABLE_OF_CONTENTS\020\n\022\032\n\026TABLE_O" +
"F_CONTENTS_ITEM\020\013BW\nFcom.knecon.fforesig" +
"ht.service.layoutparser.internal.api.dat" +
"a.redactionB\rNodeTypeProtob\006proto3"
};
descriptor = com.google.protobuf.Descriptors.FileDescriptor
.internalBuildGeneratedFileFrom(descriptorData,
new com.google.protobuf.Descriptors.FileDescriptor[] {
});
descriptor.resolveAllFeaturesImmutable();
}
// @@protoc_insertion_point(outer_class_scope)
}

View File

@ -1,5 +1,9 @@
syntax = "proto3";
option java_outer_classname = "DocumentPageProto";
option java_package = "com.knecon.fforesight.service.layoutparser.internal.api.data.redaction";
message AllDocumentPages {
repeated DocumentPage documentPages = 1;

View File

@ -1,5 +1,8 @@
syntax = "proto3";
option java_outer_classname = "DocumentPositionDataProto";
option java_package = "com.knecon.fforesight.service.layoutparser.internal.api.data.redaction";
message AllDocumentPositionData {
repeated DocumentPositionData documentPositionData = 1;

View File

@ -1,5 +1,9 @@
syntax = "proto3";
option java_outer_classname = "DocumentStructureProto";
option java_package = "com.knecon.fforesight.service.layoutparser.internal.api.data.redaction";
import "EntryData.proto";
message DocumentStructure {

View File

@ -1,5 +1,8 @@
syntax = "proto3";
option java_outer_classname = "DocumentTextDataProto";
option java_package = "com.knecon.fforesight.service.layoutparser.internal.api.data.redaction";
message AllDocumentTextData {
repeated DocumentTextData documentTextData = 1;

View File

@ -3,6 +3,9 @@ syntax = "proto3";
import "LayoutEngine.proto";
import "NodeType.proto";
option java_outer_classname = "EntryDataProto";
option java_package = "com.knecon.fforesight.service.layoutparser.internal.api.data.redaction";
message EntryData {
// Type of the semantic node.
NodeType type = 1;

View File

@ -1,5 +1,6 @@
syntax = "proto3";
option java_outer_classname = "LayoutEngineProto";
option java_package = "com.knecon.fforesight.service.layoutparser.internal.api.data.redaction";
enum LayoutEngine {
ALGORITHM = 0;
AI = 1;

View File

@ -1,5 +1,8 @@
syntax = "proto3";
option java_outer_classname = "NodeTypeProto";
option java_package = "com.knecon.fforesight.service.layoutparser.internal.api.data.redaction";
enum NodeType {
DOCUMENT = 0;
SECTION = 1;
@ -11,4 +14,6 @@ enum NodeType {
IMAGE = 7;
HEADER = 8;
FOOTER = 9;
TABLE_OF_CONTENTS = 10;
TABLE_OF_CONTENTS_ITEM = 11;
}

View File

@ -0,0 +1,26 @@
#!/bin/bash
# Minimum required protoc version
MIN_VERSION="28.3"
# Get the installed protoc version
INSTALLED_VERSION=$(protoc --version | awk '{print $2}')
# Function to compare versions
version_lt() {
[ "$(printf '%s\n' "$1" "$2" | sort -V | head -n1)" != "$1" ]
}
# Check if protoc is installed and meets the minimum version
if ! command -v protoc &> /dev/null; then
echo "Error: protoc is not installed. Please install version $MIN_VERSION or later."
exit 1
fi
if version_lt "$INSTALLED_VERSION" "$MIN_VERSION"; then
echo "Error: protoc version $INSTALLED_VERSION is too old. Please upgrade to version $MIN_VERSION or later."
exit 1
fi
# Generate Java files from proto files
protoc --java_out=../java ./*.proto

View File

@ -35,6 +35,4 @@ dependencies {
implementation("org.commonmark:commonmark-ext-gfm-tables:0.22.0")
implementation("com.pdftron:PDFNet:10.11.0")
implementation("org.apache.commons:commons-text:1.12.0")
implementation("com.google.protobuf:protobuf-java-util:4.27.1")
}

View File

@ -39,10 +39,9 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Do
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType;
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineExtractorService;
import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObject;
import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineValidationService;
import com.knecon.fforesight.service.layoutparser.processor.model.outline.TOCEnrichmentService;
import com.knecon.fforesight.service.layoutparser.processor.model.outline.TableOfContents;
import com.knecon.fforesight.service.layoutparser.processor.model.outline.SectionTreeBuilderService;
import com.knecon.fforesight.service.layoutparser.processor.model.outline.SectionTree;
import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
@ -107,7 +106,7 @@ public class LayoutParsingPipeline {
GraphicExtractorService graphicExtractorService;
OutlineExtractorService outlineExtractorService;
OutlineValidationService outlineValidationService;
TOCEnrichmentService tocEnrichmentService;
SectionTreeBuilderService sectionTreeBuilderService;
LayoutparserSettings settings;
ClassificationService classificationService;
@ -345,14 +344,14 @@ public class LayoutParsingPipeline {
classificationService.classify(classificationDocument, layoutParsingType, identifier);
TableOfContents tableOfContents = outlineValidationService.createToC(classificationDocument);
classificationDocument.setTableOfContents(tableOfContents);
SectionTree sectionTree = outlineValidationService.createSectionTree(classificationDocument);
classificationDocument.setSectionTree(sectionTree);
log.info("Building Sections for {}", identifier);
switch (layoutParsingType) {
case CLARIFYND_PARAGRAPH_DEBUG, REDACT_MANAGER_PARAGRAPH_DEBUG -> sectionsBuilderService.buildParagraphDebugSections(classificationDocument);
default -> tocEnrichmentService.assignSectionBlocksAndImages(classificationDocument);
default -> sectionTreeBuilderService.assignSectionBlocksAndImages(classificationDocument);
}
return classificationDocument;

View File

@ -4,7 +4,7 @@ import java.util.ArrayList;
import java.util.List;
import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObjectTree;
import com.knecon.fforesight.service.layoutparser.processor.model.outline.TableOfContents;
import com.knecon.fforesight.service.layoutparser.processor.model.outline.SectionTree;
import com.knecon.fforesight.service.layoutparser.processor.model.text.StringFrequencyCounter;
import com.knecon.fforesight.service.layoutparser.processor.model.text.UnclassifiedText;
import com.knecon.fforesight.service.layoutparser.processor.visualization.LayoutDebugLayer;
@ -31,6 +31,6 @@ public class ClassificationDocument {
private long rulesVersion;
private OutlineObjectTree outlineObjectTree;
private TableOfContents tableOfContents;
private SectionTree sectionTree;
}

View File

@ -14,6 +14,7 @@ public enum PageBlockType {
PARAGRAPH_ITALIC,
PARAGRAPH_UNKNOWN,
OTHER,
TABLE_OF_CONTENTS_HEADLINE,
TABLE_OF_CONTENTS_ITEM,
LIST_ITEM,
TABLE;
@ -35,7 +36,7 @@ public enum PageBlockType {
public static int getHeadlineNumber(PageBlockType pageBlockType) {
return switch (pageBlockType) {
case H1 -> 1;
case H1, TABLE_OF_CONTENTS_HEADLINE -> 1;
case H2 -> 2;
case H3 -> 3;
case H4 -> 4;
@ -47,6 +48,6 @@ public enum PageBlockType {
public boolean isHeadline() {
return this.equals(H1) || this.equals(H2) || this.equals(H3) || this.equals(H4) || this.equals(H5) || this.equals(H6);
return this.equals(H1) || this.equals(H2) || this.equals(H3) || this.equals(H4) || this.equals(H5) || this.equals(H6) || this.equals(TABLE_OF_CONTENTS_HEADLINE);
}
}

View File

@ -11,6 +11,8 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Se
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SuperSection;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableOfContents;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableOfContentsItem;
public abstract class AbstractNodeVisitor implements NodeVisitor {
@ -83,6 +85,18 @@ public abstract class AbstractNodeVisitor implements NodeVisitor {
visitChildren(tableCell);
}
@Override
public void visit(TableOfContents toc) {
visitChildren(toc);
}
@Override
public void visit(TableOfContentsItem toci) {
visitChildren(toci);
}
protected void visitChildren(SemanticNode semanticNode) {

View File

@ -10,6 +10,10 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Se
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SuperSection;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableOfContents;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableOfContentsItem;
import software.amazon.awssdk.utils.builder.ToCopyableBuilder;
public interface NodeVisitor {
@ -42,4 +46,10 @@ public interface NodeVisitor {
void visit(TableCell tableCell);
void visit(TableOfContents tableOfContents);
void visit(TableOfContentsItem tableOfContentsItem);
}

View File

@ -0,0 +1,41 @@
package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeTypeProto;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.NodeVisitor;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.FieldDefaults;
import lombok.experimental.SuperBuilder;
@Data
@SuperBuilder
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
@EqualsAndHashCode(callSuper = true)
public class TableOfContents extends AbstractSemanticNode {
@Override
public NodeTypeProto.NodeType getType() {
return NodeTypeProto.NodeType.TABLE_OF_CONTENTS;
}
public Headline getHeadline() {
return streamChildrenOfType(NodeTypeProto.NodeType.HEADLINE).map(node -> (Headline) node)
.findFirst()
.orElseGet(() -> getParent().getHeadline());
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
}

View File

@ -0,0 +1,51 @@
package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeTypeProto;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.NodeVisitor;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.FieldDefaults;
import lombok.experimental.SuperBuilder;
@Data
@SuperBuilder
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
@EqualsAndHashCode(callSuper = true)
public class TableOfContentsItem extends AbstractSemanticNode {
TextBlock leafTextBlock;
@Override
public NodeTypeProto.NodeType getType() {
return NodeTypeProto.NodeType.TABLE_OF_CONTENTS_ITEM;
}
@Override
public boolean isLeaf() {
return true;
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
@Override
public TextBlock getTextBlock() {
return leafTextBlock;
}
}

View File

@ -1,5 +1,6 @@
package com.knecon.fforesight.service.layoutparser.processor.model.outline;
import static com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType.TABLE_OF_CONTENTS_HEADLINE;
import static com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType.getHeadlineNumber;
import java.util.ArrayList;
@ -21,20 +22,20 @@ import lombok.extern.slf4j.Slf4j;
public class OutlineValidationService {
@Observed(name = "OutlineValidationService", contextualName = "create-toc")
public TableOfContents createToC(ClassificationDocument classificationDocument) {
public SectionTree createSectionTree(ClassificationDocument classificationDocument) {
List<TextPageBlock> headlines = extractHeadlines(classificationDocument);
List<TableOfContentItem> mainSections = new ArrayList<>();
Map<Integer, TableOfContentItem> lastItemsPerDepth = new HashMap<>();
TableOfContentItem last = null;
List<SectionTreeEntry> mainSections = new ArrayList<>();
Map<Integer, SectionTreeEntry> lastItemsPerDepth = new HashMap<>();
SectionTreeEntry last = null;
TreeSet<Integer> depths = new TreeSet<>();
for (TextPageBlock current : headlines) {
int currentDepth = getHeadlineNumber(current.getClassification());
Integer parentDepth = depths.floor(currentDepth - 1);
var tocItem = new TableOfContentItem(current);
var tocItem = new SectionTreeEntry(current);
if (parentDepth == null) {
mainSections.add(tocItem);
@ -44,14 +45,16 @@ public class OutlineValidationService {
} else {
assert last != null;
int lastDepth = getHeadlineNumber(last.getHeadline().getClassification());
if (lastDepth < parentDepth) {
if (last.getHeadline().getClassification().equals(TABLE_OF_CONTENTS_HEADLINE) && !current.getClassification().equals(TABLE_OF_CONTENTS_HEADLINE)) {
// headline after toc should always start a main section
parentDepth = 1;
} else if (lastDepth < parentDepth) {
parentDepth = lastDepth;
} else if (lastDepth == currentDepth && last.getParent() != null) {
parentDepth = getHeadlineNumber(last.getParent().getHeadline().getClassification());
}
TableOfContentItem parent = lastItemsPerDepth.get(parentDepth);
SectionTreeEntry parent = lastItemsPerDepth.get(parentDepth);
parent.addChild(tocItem);
}
@ -60,7 +63,10 @@ public class OutlineValidationService {
depths.add(currentDepth);
}
return new TableOfContents(mainSections);
return new
SectionTree(mainSections);
}

View File

@ -14,12 +14,12 @@ import lombok.RequiredArgsConstructor;
@Data
@RequiredArgsConstructor
public class TableOfContents implements Iterable<TableOfContentItem> {
public class SectionTree implements Iterable<SectionTreeEntry> {
private List<TableOfContentItem> mainSections = new ArrayList<>();
private List<SectionTreeEntry> mainSections = new ArrayList<>();
public TableOfContents(List<TableOfContentItem> mainSections) {
public SectionTree(List<SectionTreeEntry> mainSections) {
this.mainSections = mainSections;
}
@ -28,36 +28,36 @@ public class TableOfContents implements Iterable<TableOfContentItem> {
public List<TextPageBlock> getAllTextPageBlocks() {
List<TextPageBlock> allTextPageBlocks = new ArrayList<>();
for (TableOfContentItem item : mainSections) {
for (SectionTreeEntry item : mainSections) {
collectTextPageBlocks(item, allTextPageBlocks);
}
return allTextPageBlocks;
}
private void collectTextPageBlocks(TableOfContentItem item, List<TextPageBlock> textPageBlocks) {
private void collectTextPageBlocks(SectionTreeEntry item, List<TextPageBlock> textPageBlocks) {
textPageBlocks.add(item.getHeadline());
for (TableOfContentItem child : item.getChildren()) {
for (SectionTreeEntry child : item.getChildren()) {
collectTextPageBlocks(child, textPageBlocks);
}
}
public List<TableOfContentItem> getAllTableOfContentItems() {
public List<SectionTreeEntry> getAllTableOfContentItems() {
List<TableOfContentItem> allItems = new ArrayList<>();
for (TableOfContentItem item : mainSections) {
List<SectionTreeEntry> allItems = new ArrayList<>();
for (SectionTreeEntry item : mainSections) {
collectTableOfContentItems(item, allItems);
}
return allItems;
}
private void collectTableOfContentItems(TableOfContentItem item, List<TableOfContentItem> allItems) {
private void collectTableOfContentItems(SectionTreeEntry item, List<SectionTreeEntry> allItems) {
allItems.add(item);
for (TableOfContentItem child : item.getChildren()) {
for (SectionTreeEntry child : item.getChildren()) {
collectTableOfContentItems(child, allItems);
}
}
@ -65,7 +65,7 @@ public class TableOfContents implements Iterable<TableOfContentItem> {
private boolean containsBlock(TextPageBlock block) {
for (TableOfContentItem existingItem : this.getMainSections()) {
for (SectionTreeEntry existingItem : this.getMainSections()) {
if (existingItem.getHeadline().equals(block) || existingItem.contains(block)) {
return true;
}
@ -74,9 +74,9 @@ public class TableOfContents implements Iterable<TableOfContentItem> {
}
private boolean containsItem(TableOfContentItem tocItem) {
private boolean containsItem(SectionTreeEntry tocItem) {
for (TableOfContentItem existingItem : this.getMainSections()) {
for (SectionTreeEntry existingItem : this.getMainSections()) {
if (existingItem.equals(tocItem) || existingItem.contains(tocItem)) {
return true;
}
@ -86,18 +86,18 @@ public class TableOfContents implements Iterable<TableOfContentItem> {
@Override
public @NonNull Iterator<TableOfContentItem> iterator() {
public @NonNull Iterator<SectionTreeEntry> iterator() {
return new TableOfContentItemIterator(mainSections);
return new SectionTreeEntryIterator(mainSections);
}
private static class TableOfContentItemIterator implements Iterator<TableOfContentItem> {
private static class SectionTreeEntryIterator implements Iterator<SectionTreeEntry> {
private final Stack<Iterator<TableOfContentItem>> stack = new Stack<>();
private final Stack<Iterator<SectionTreeEntry>> stack = new Stack<>();
TableOfContentItemIterator(List<TableOfContentItem> mainSections) {
SectionTreeEntryIterator(List<SectionTreeEntry> mainSections) {
stack.push(mainSections.iterator());
}
@ -112,10 +112,10 @@ public class TableOfContents implements Iterable<TableOfContentItem> {
@Override
public TableOfContentItem next() {
public SectionTreeEntry next() {
ensureStackTopIsCurrent();
TableOfContentItem currentItem = stack.peek().next();
SectionTreeEntry currentItem = stack.peek().next();
if (currentItem.getChildren() != null && !currentItem.getChildren().isEmpty()) {
stack.push(currentItem.getChildren()
.iterator());

View File

@ -23,28 +23,28 @@ import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
public class TOCEnrichmentService {
public class SectionTreeBuilderService {
public void assignSectionBlocksAndImages(ClassificationDocument document) {
TableOfContents toc = document.getTableOfContents();
Iterator<TableOfContentItem> iterator = toc.iterator();
TableOfContentItem currentTOCItem = null;
SectionTree toc = document.getSectionTree();
Iterator<SectionTreeEntry> iterator = toc.iterator();
SectionTreeEntry currentTOCItem = null;
if (iterator.hasNext()) {
currentTOCItem = iterator.next();
}
List<AbstractPageBlock> startBlocks = new ArrayList<>();
List<ClassifiedImage> startImages = new ArrayList<>();
TableOfContentItem currentSection = null;
SectionTreeEntry currentSection = null;
boolean foundFirstHeadline = false;
List<ClassificationHeader> headers = new ArrayList<>();
List<ClassificationFooter> footers = new ArrayList<>();
TablePageBlock previousTable = null;
List<TableOfContentItem> lastFoundTOCItems = new ArrayList<>();
List<SectionTreeEntry> lastFoundTOCItems = new ArrayList<>();
for (ClassificationPage page : document.getPages()) {
List<TableOfContentItem> currentPageTOCItems = new ArrayList<>();
List<SectionTreeEntry> currentPageTOCItems = new ArrayList<>();
List<TextPageBlock> header = new ArrayList<>();
List<TextPageBlock> footer = new ArrayList<>();
for (AbstractPageBlock current : page.getTextBlocks()) {
@ -101,7 +101,7 @@ public class TOCEnrichmentService {
Double xMax = null;
Double yMax = null;
for (TableOfContentItem tocItem : lastFoundTOCItems) {
for (SectionTreeEntry tocItem : lastFoundTOCItems) {
var headline = tocItem.getHeadline();
if (headline.getPage() != page.getPageNumber()) {
@ -169,10 +169,10 @@ public class TOCEnrichmentService {
}
if (!startBlocks.isEmpty() || !startImages.isEmpty()) {
TableOfContentItem unassigned = new TableOfContentItem(null);
SectionTreeEntry unassigned = new SectionTreeEntry(null);
unassigned.setSectionBlocks(startBlocks);
unassigned.setImages(startImages);
document.getTableOfContents().getMainSections().add(0, unassigned);
document.getSectionTree().getMainSections().add(0, unassigned);
}
document.setHeaders(headers);
document.setFooters(footers);

View File

@ -5,6 +5,7 @@ import java.util.List;
import java.util.stream.Collectors;
import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.GenericSemanticNode;
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
@ -14,12 +15,18 @@ import lombok.EqualsAndHashCode;
@Data
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
public class TableOfContentItem {
public class SectionTreeEntry {
public enum Type {
SECTION,
SUPER_SECTION,
TOC_SECTION
}
@EqualsAndHashCode.Include
private TextPageBlock headline;
private List<TableOfContentItem> children = new ArrayList<>();
private TableOfContentItem parent;
private List<SectionTreeEntry> children = new ArrayList<>();
private SectionTreeEntry parent;
private List<AbstractPageBlock> sectionBlocks = new ArrayList<>();
private List<ClassifiedImage> images = new ArrayList<>();
@ -27,20 +34,32 @@ public class TableOfContentItem {
private GenericSemanticNode section;
public TableOfContentItem(TextPageBlock headline) {
public SectionTreeEntry(TextPageBlock headline) {
this.headline = headline;
}
public void addChild(TableOfContentItem tableOfContentItem) {
public Type getType() {
children.add(tableOfContentItem);
tableOfContentItem.setParent(this);
if (headline.getClassification().equals(PageBlockType.TABLE_OF_CONTENTS_HEADLINE)) {
return Type.TOC_SECTION;
}
if (children.isEmpty()) {
return Type.SECTION;
}
return Type.SUPER_SECTION;
}
public TableOfContentItem getSiblingBefore() {
public void addChild(SectionTreeEntry sectionTreeEntry) {
children.add(sectionTreeEntry);
sectionTreeEntry.setParent(this);
}
public SectionTreeEntry getSiblingBefore() {
if (parent != null) {
int index = parent.getChildren().indexOf(this);
@ -52,7 +71,7 @@ public class TableOfContentItem {
}
public TableOfContentItem getSiblingAfter() {
public SectionTreeEntry getSiblingAfter() {
if (parent != null) {
int index = parent.getChildren().indexOf(this);
@ -69,7 +88,7 @@ public class TableOfContentItem {
if (headline.equals(block)) {
return true;
}
for (TableOfContentItem child : children) {
for (SectionTreeEntry child : children) {
if (child.contains(block)) {
return true;
}
@ -78,12 +97,12 @@ public class TableOfContentItem {
}
public boolean contains(TableOfContentItem tocItem) {
public boolean contains(SectionTreeEntry tocItem) {
if (this.equals(tocItem)) {
return true;
}
for (TableOfContentItem child : children) {
for (SectionTreeEntry child : children) {
if (child.contains(tocItem)) {
return true;
}

View File

@ -61,7 +61,7 @@ public class TableOfContentsClassificationService {
if (end > i + 1) {
if (textBlock.textBlock().getClassification() == null) {
textBlock.textBlock().setClassification(PageBlockType.H1);
textBlock.textBlock().setClassification(PageBlockType.TABLE_OF_CONTENTS_HEADLINE);
}
i = end;
}

View File

@ -23,6 +23,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.Classification
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationFooter;
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationHeader;
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.AbstractSemanticNode;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
@ -35,10 +36,11 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Im
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Paragraph;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableOfContentsItem;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.AtomicTextBlock;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
import com.knecon.fforesight.service.layoutparser.processor.model.outline.TableOfContentItem;
import com.knecon.fforesight.service.layoutparser.processor.model.outline.SectionTreeEntry;
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
import com.knecon.fforesight.service.layoutparser.processor.utils.IdBuilder;
import com.knecon.fforesight.service.layoutparser.processor.utils.TextPositionOperations;
@ -65,7 +67,7 @@ public class DocumentGraphFactory {
document.getPages()
.forEach(context::buildAndAddPageWithCounter);
addSectionsForToC(layoutParsingType, document, context, documentGraph);
addSections(layoutParsingType, document, context, documentGraph);
addHeaderAndFooterToEachPage(document, context);
documentGraph.setNumberOfPages(context.pages.size());
@ -92,18 +94,18 @@ public class DocumentGraphFactory {
}
private void addSectionsForToC(LayoutParsingType layoutParsingType, ClassificationDocument classificationDocument, Context context, Document document) {
private void addSections(LayoutParsingType layoutParsingType, ClassificationDocument classificationDocument, Context context, Document document) {
for (TableOfContentItem tocItem : classificationDocument.getTableOfContents()) {
GenericSemanticNode parent = tocItem.getParent() == null ? null : tocItem.getParent().getSection();
for (SectionTreeEntry sectionTreeEntry : classificationDocument.getSectionTree()) {
GenericSemanticNode parent = sectionTreeEntry.getParent() == null ? null : sectionTreeEntry.getParent().getSection();
Optional<GenericSemanticNode> section = SectionNodeFactory.addSection(layoutParsingType,
parent,
tocItem.getChildren().isEmpty(),
tocItem.getNonEmptySectionBlocks(),
tocItem.getImages(),
sectionTreeEntry.getType(),
sectionTreeEntry.getNonEmptySectionBlocks(),
sectionTreeEntry.getImages(),
context,
document);
tocItem.setSection(section.orElse(null));
sectionTreeEntry.setSection(section.orElse(null));
}
}
@ -121,6 +123,8 @@ public class DocumentGraphFactory {
node = Headline.builder().documentTree(context.getDocumentTree()).build();
} else if (originalTextBlock.isToDuplicate() && layoutParsingType.equals(LayoutParsingType.REDACT_MANAGER)) {
node = DuplicatedParagraph.builder().documentTree(context.getDocumentTree()).build();
} else if (originalTextBlock.getClassification().equals(PageBlockType.TABLE_OF_CONTENTS_ITEM)) {
node = TableOfContentsItem.builder().documentTree(context.getDocumentTree()).build();
} else {
node = Paragraph.builder().documentTree(context.getDocumentTree()).build();
}

View File

@ -17,7 +17,9 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Do
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.GenericSemanticNode;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Section;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SuperSection;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableOfContents;
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
import com.knecon.fforesight.service.layoutparser.processor.model.outline.SectionTreeEntry;
import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
import com.knecon.fforesight.service.layoutparser.processor.utils.TableMergingUtility;
@ -29,7 +31,7 @@ public class SectionNodeFactory {
public Optional<GenericSemanticNode> addSection(LayoutParsingType layoutParsingType,
GenericSemanticNode parentNode,
boolean isLeaf,
SectionTreeEntry.Type type,
List<AbstractPageBlock> pageBlocks,
List<ClassifiedImage> images,
DocumentGraphFactory.Context context,
@ -48,12 +50,11 @@ public class SectionNodeFactory {
return Optional.empty();
}
AbstractSemanticNode section;
if (isLeaf) {
section = Section.builder().documentTree(context.getDocumentTree()).build();
} else {
section = SuperSection.builder().documentTree(context.getDocumentTree()).build();
}
AbstractSemanticNode section = switch (type) {
case SECTION -> Section.builder().documentTree(context.getDocumentTree()).build();
case SUPER_SECTION -> SuperSection.builder().documentTree(context.getDocumentTree()).build();
case TOC_SECTION -> TableOfContents.builder().documentTree(context.getDocumentTree()).build();
};
context.getSections().add(section);
@ -64,13 +65,14 @@ public class SectionNodeFactory {
if (containsTablesAndTextBlocks) {
splitPageBlocksIntoSubSections(pageBlocks).forEach(subSectionPageBlocks -> addSection(layoutParsingType,
section,
true,
SectionTreeEntry.Type.SECTION,
subSectionPageBlocks,
emptyList(),
context,
document));
} else if (!isLeaf) {
addSection(layoutParsingType, section, true, pageBlocks, emptyList(), context, document);
} else if (type.equals(SectionTreeEntry.Type.SUPER_SECTION)) {
// If a SuperSection contains more blocks than just a headline, we add a Section which contains the remaining textblocks.
addSection(layoutParsingType, section, SectionTreeEntry.Type.SECTION, pageBlocks, emptyList(), context, document);
} else {
addTablesAndParagraphsAndHeadlinesToSection(layoutParsingType, pageBlocks, context, section, document);
}

View File

@ -14,6 +14,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Pa
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
import com.knecon.fforesight.service.layoutparser.processor.model.outline.SectionTreeEntry;
import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
import com.knecon.fforesight.service.layoutparser.processor.model.text.Word;
@ -120,7 +121,7 @@ public class TableNodeFactory {
} else if (firstTextBlockIsHeadline(cell)) {
SectionNodeFactory.addSection(layoutParsingType,
tableCell,
true,
SectionTreeEntry.Type.SECTION,
cell.getTextBlocks()
.stream()
.map(tb -> (AbstractPageBlock) tb)

View File

@ -12,6 +12,7 @@ import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.EntryDataProto.EntryData;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPageProto.AllDocumentPages;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPageProto.DocumentPage;
@ -20,7 +21,6 @@ import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.Do
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionDataProto.DocumentPositionData.Position;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructureProto.DocumentStructure;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructureWrapper;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.EntryDataProto.EntryData;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngineProto.LayoutEngine;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;

View File

@ -1,6 +1,5 @@
package com.knecon.fforesight.service.layoutparser.processor.services.mapper;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
@ -9,9 +8,9 @@ import java.util.NoSuchElementException;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPageProto.DocumentPage;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.EntryDataProto.EntryData;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionDataProto.AllDocumentPositionData;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextDataProto.AllDocumentTextData;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.EntryDataProto.EntryData;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.DuplicatedParagraph;
@ -26,6 +25,8 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Se
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SuperSection;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableOfContents;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableOfContentsItem;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.AtomicTextBlock;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlockCollector;
@ -70,13 +71,15 @@ public class DocumentGraphMapper {
SemanticNode node = switch (entryData.getType()) {
case SECTION -> buildSection(context);
case SUPER_SECTION -> buildSuperSection(context);
case PARAGRAPH -> buildParagraph(context, entryData.getProperties());
case PARAGRAPH -> buildParagraph(context, entryData.getPropertiesMap());
case HEADLINE -> buildHeadline(context);
case HEADER -> buildHeader(context);
case FOOTER -> buildFooter(context);
case TABLE -> buildTable(context, entryData.getProperties());
case TABLE_CELL -> buildTableCell(context, entryData.getProperties());
case IMAGE -> buildImage(context, entryData.getProperties(), entryData.getPageNumbersList());
case TABLE -> buildTable(context, entryData.getPropertiesMap());
case TABLE_CELL -> buildTableCell(context, entryData.getPropertiesMap());
case IMAGE -> buildImage(context, entryData.getPropertiesMap(), entryData.getPageNumbersList());
case TABLE_OF_CONTENTS -> buildTableOfContents(context);
case TABLE_OF_CONTENTS_ITEM -> buildTableOfContentsItem(context);
default -> throw new UnsupportedOperationException("Not yet implemented for type " + entryData.getType());
};
@ -100,6 +103,18 @@ public class DocumentGraphMapper {
}
private static SemanticNode buildTableOfContents(Context context) {
return TableOfContents.builder().documentTree(context.documentTree).build();
}
private static SemanticNode buildTableOfContentsItem(Context context) {
return TableOfContentsItem.builder().documentTree(context.documentTree).build();
}
private Headline buildHeadline(Context context) {
return Headline.builder().documentTree(context.documentTree).build();
@ -182,13 +197,11 @@ public class DocumentGraphMapper {
private AtomicTextBlock getAtomicTextBlock(Context context, SemanticNode parent, Long atomicTextBlockId) {
return AtomicTextBlock.fromAtomicTextBlockData(context.documentTextDataBlockData.getDocumentTextDataList()
.get(Math.toIntExact(atomicTextBlockId)),
context.atomicPositionBlockData.getDocumentPositionDataList()
.get(Math.toIntExact(atomicTextBlockId)),
return AtomicTextBlock.fromAtomicTextBlockData(context.documentTextDataBlockData.getDocumentTextDataList().get(Math.toIntExact(atomicTextBlockId)),
context.atomicPositionBlockData.getDocumentPositionDataList().get(Math.toIntExact(atomicTextBlockId)),
parent,
getPage(context.documentTextDataBlockData.getDocumentTextDataList()
.get(Math.toIntExact(atomicTextBlockId)).getPage(), context));
getPage(context.documentTextDataBlockData.getDocumentTextDataList().get(Math.toIntExact(atomicTextBlockId)).getPage(),
context));
}

View File

@ -38,6 +38,7 @@ public class LayoutGridService {
layoutGrid.setVisibleByDefault(layerVisibilityDefaultValue);
document.getLayoutDebugLayer().addSentenceVisualization(document.getTextBlock());
document.getLayoutDebugLayer().addOutlineHeadlines(document);
if (document.getLayoutDebugLayer().isActive()) {
viewerDocumentService.addLayerGroups(originFile, destinationFile, List.of(layoutGrid, document.getLayoutDebugLayer()), outline);
@ -54,12 +55,13 @@ public class LayoutGridService {
.peek(layoutGrid::addTreeId)
.forEach(semanticNode -> {
switch (semanticNode.getType()) {
case SECTION, SUPER_SECTION -> layoutGrid.addSection(semanticNode);
case SECTION, SUPER_SECTION, TABLE_OF_CONTENTS -> layoutGrid.addSection(semanticNode);
case HEADLINE -> layoutGrid.addHeadline((Headline) semanticNode);
case PARAGRAPH -> layoutGrid.addParagraph((Paragraph) semanticNode);
case TABLE -> layoutGrid.addTable((Table) semanticNode);
case IMAGE -> layoutGrid.addImage((Image) semanticNode);
case HEADER, FOOTER -> layoutGrid.addHeaderOrFooter(semanticNode);
case TABLE_OF_CONTENTS_ITEM -> layoutGrid.addTableOfContentsItem(semanticNode);
}
});
return layoutGrid;

View File

@ -111,8 +111,8 @@ public class PdfVisualisationUtility {
return DrawingOptions.builder().stroke(true).strokeColor(switch (entry.getType()) {
case DOCUMENT -> Color.LIGHT_GRAY;
case HEADER, FOOTER -> Color.GREEN;
case PARAGRAPH -> Color.BLUE;
case SUPER_SECTION, SECTION -> Color.BLACK;
case PARAGRAPH, TABLE_OF_CONTENTS_ITEM -> Color.BLUE;
case SUPER_SECTION, SECTION, TABLE_OF_CONTENTS -> Color.BLACK;
case HEADLINE -> Color.RED;
case TABLE -> Color.ORANGE;
case TABLE_CELL -> Color.GRAY;

View File

@ -15,13 +15,17 @@ import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent;
import org.checkerframework.checker.units.qual.C;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngineProto;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeTypeProto;
import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.BoundingBox;
import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.Line;
import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.Zone;
import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.TextRange;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObject;
import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
@ -379,4 +383,19 @@ public class LayoutDebugLayer extends LayoutDebugLayerConfig {
}
}
public void addOutlineHeadlines(Document document) {
if (!active) {
return;
}
document.streamAllSubNodes()
.filter(node -> node.getType().equals(NodeTypeProto.NodeType.HEADLINE))
.filter(node -> node.getEngines().contains(LayoutEngineProto.LayoutEngine.OUTLINE))
.forEach(headline -> headline.getBBox()
.forEach((page, bbox) -> getOrCreateVisualizationsOnPage(page.getNumber(), this.outlineHeadlines).getColoredRectangles()
.add(new ColoredRectangle(bbox, HEADLINE_COLOR, LINE_WIDTH))));
}
}

View File

@ -26,6 +26,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Se
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SuperSection;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableOfContents;
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
import com.knecon.fforesight.service.viewerdoc.layers.LayoutGridLayerConfig;
import com.knecon.fforesight.service.viewerdoc.model.ColoredLine;
@ -72,10 +73,12 @@ public class LayoutGrid extends LayoutGridLayerConfig {
public void addHeadline(Headline headline) {
addAsRectangle(headline, headlines, HEADLINE_COLOR);
if (headline.getEngines().contains(LayoutEngine.OUTLINE)) {
addAsRectangle(headline, outlineHeadlines, HEADLINE_COLOR);
if (headline.getParent().getType().equals(NodeTypeProto.NodeType.TABLE_OF_CONTENTS)) {
addAsRectangle(headline, toc, HEADLINE_COLOR);
} else {
addAsRectangle(headline, headlines, HEADLINE_COLOR);
}
}
@ -88,19 +91,10 @@ public class LayoutGrid extends LayoutGridLayerConfig {
public void addTreeId(SemanticNode semanticNode) {
Page page = semanticNode.getFirstPage();
if (semanticNode.getBBox()
.get(page) == null) {
if (semanticNode.getBBox().get(page) == null) {
return;
}
addPlacedText(page,
semanticNode.getBBox()
.get(page),
semanticNode.getBBox()
.get(page),
buildTreeIdString(semanticNode),
1,
treeIds,
TREEID_COLOR);
addPlacedText(page, semanticNode.getBBox().get(page), semanticNode.getBBox().get(page), buildTreeIdString(semanticNode), 1, treeIds, TREEID_COLOR);
}
@ -124,20 +118,19 @@ public class LayoutGrid extends LayoutGridLayerConfig {
public void addSection(SemanticNode section) {
Map<Page, Rectangle2D> bBoxMap = section.getBBox();
Color color = section.getType().equals(NodeTypeProto.NodeType.TABLE_OF_CONTENTS) ? TOC_COLOR : SECTION_COLOR;
List<SemanticNode> subSections = section.streamAllSubNodesOfType(NodeTypeProto.NodeType.SECTION)
.toList();
Integer maxChildDepth = subSections.stream()
.map(node -> node.getTreeId().size())
.max(Integer::compareTo)
.orElse(section.getTreeId().size());
.max(Integer::compareTo).orElse(section.getTreeId().size());
int ownDepth = section.getTreeId().size();
Page firstPage = section.getFirstPage();
String treeIdString = buildTreeIdString(section);
if (bBoxMap.values().size() == 1) {
handleSinglePage(section, firstPage, bBoxMap.get(firstPage), treeIdString, maxChildDepth, ownDepth);
handleSinglePage(section, firstPage, bBoxMap.get(firstPage), treeIdString, maxChildDepth, ownDepth, color);
return;
}
List<Page> pagesInOrder = bBoxMap.keySet()
@ -145,12 +138,12 @@ public class LayoutGrid extends LayoutGridLayerConfig {
.sorted(Comparator.comparingInt(Page::getNumber))
.collect(Collectors.toList());
pagesInOrder.remove(0);
handleFirstPageOfSection(section, firstPage, bBoxMap.get(firstPage), treeIdString, maxChildDepth, ownDepth);
handleFirstPageOfSection(section, firstPage, bBoxMap.get(firstPage), treeIdString, maxChildDepth, ownDepth, color);
for (Page middlePage : pagesInOrder.subList(0, pagesInOrder.size() - 1)) {
handleForMiddlePageOfSection(section, middlePage, bBoxMap.get(middlePage), treeIdString, maxChildDepth, ownDepth);
handleForMiddlePageOfSection(section, middlePage, bBoxMap.get(middlePage), treeIdString, maxChildDepth, ownDepth, color);
}
var lastPage = pagesInOrder.remove(pagesInOrder.size() - 1);
handleLastPageOfSection(section, lastPage, bBoxMap.get(lastPage), treeIdString, maxChildDepth, ownDepth);
handleLastPageOfSection(section, lastPage, bBoxMap.get(lastPage), treeIdString, maxChildDepth, ownDepth, color);
}
@ -232,33 +225,45 @@ public class LayoutGrid extends LayoutGridLayerConfig {
}
private void handleSinglePage(SemanticNode semanticNode, Page page, Rectangle2D rectangle2D, String treeIdString, Integer maxChildDepth, Integer ownDepth) {
private void handleSinglePage(SemanticNode semanticNode, Page page, Rectangle2D rectangle2D, String treeIdString, Integer maxChildDepth, Integer ownDepth, Color color) {
RectangleAndLinesResult result = createLinesAndPlaceText(semanticNode, page, rectangle2D, treeIdString, maxChildDepth, ownDepth);
// add string to top line
var firstLine = result.pageLines().remove(0);
result.coloredLines().add(new ColoredLine(firstLine, SECTION_COLOR, LINE_WIDTH));
result.coloredLines().add(new ColoredLine(firstLine, color, LINE_WIDTH));
for (Line2D line : result.pageLines()) {
result.coloredLines().add(new ColoredLine(line, SECTION_COLOR, LINE_WIDTH));
result.coloredLines().add(new ColoredLine(line, color, LINE_WIDTH));
}
}
private void handleFirstPageOfSection(SemanticNode semanticNode, Page firstPage, Rectangle2D rectangle2D, String treeIdString, Integer maxChildDepth, Integer ownDepth) {
private void handleFirstPageOfSection(SemanticNode semanticNode,
Page firstPage,
Rectangle2D rectangle2D,
String treeIdString,
Integer maxChildDepth,
Integer ownDepth,
Color color) {
RectangleAndLinesResult result = createLinesAndPlaceText(semanticNode, firstPage, rectangle2D, treeIdString, maxChildDepth, ownDepth);
// remove bottom line
result.pageLines().remove(2);
// add string to top line
var firstLine = result.pageLines().remove(0);
result.coloredLines().add(new ColoredLine(firstLine, SECTION_COLOR, LINE_WIDTH));
result.coloredLines().add(new ColoredLine(firstLine, color, LINE_WIDTH));
for (Line2D line : result.pageLines()) {
result.coloredLines().add(new ColoredLine(line, SECTION_COLOR, LINE_WIDTH));
result.coloredLines().add(new ColoredLine(line, color, LINE_WIDTH));
}
}
private void handleForMiddlePageOfSection(SemanticNode semanticNode, Page middlePage, Rectangle2D rectangle2D, String treeIdString, Integer maxChildDepth, Integer ownDepth) {
private void handleForMiddlePageOfSection(SemanticNode semanticNode,
Page middlePage,
Rectangle2D rectangle2D,
String treeIdString,
Integer maxChildDepth,
Integer ownDepth,
Color color) {
RectangleAndLinesResult result = createLinesAndPlaceText(semanticNode, middlePage, rectangle2D, treeIdString, maxChildDepth, ownDepth);
// remove top line
@ -267,23 +272,29 @@ public class LayoutGrid extends LayoutGridLayerConfig {
result.pageLines().remove(1);
// add string to left line
var leftLine = result.pageLines().remove(1);
result.coloredLines().add(new ColoredLine(leftLine, SECTION_COLOR, LINE_WIDTH));
result.coloredLines().add(new ColoredLine(leftLine, color, LINE_WIDTH));
for (Line2D line : result.pageLines()) {
result.coloredLines().add(new ColoredLine(line, SECTION_COLOR, LINE_WIDTH));
result.coloredLines().add(new ColoredLine(line, color, LINE_WIDTH));
}
}
private void handleLastPageOfSection(SemanticNode semanticNode, Page lastPage, Rectangle2D rectangle2D, String treeIdString, Integer maxChildDepth, Integer ownDepth) {
private void handleLastPageOfSection(SemanticNode semanticNode,
Page lastPage,
Rectangle2D rectangle2D,
String treeIdString,
Integer maxChildDepth,
Integer ownDepth,
Color color) {
RectangleAndLinesResult result = createLinesAndPlaceText(semanticNode, lastPage, rectangle2D, treeIdString, maxChildDepth, ownDepth);
// remove top line
result.pageLines().remove(0);
// add string to left line
var leftLine = result.pageLines().remove(2);
result.coloredLines().add(new ColoredLine(leftLine, SECTION_COLOR, LINE_WIDTH));
result.coloredLines().add(new ColoredLine(leftLine, color, LINE_WIDTH));
for (Line2D line : result.pageLines()) {
result.coloredLines().add(new ColoredLine(line, SECTION_COLOR, LINE_WIDTH));
result.coloredLines().add(new ColoredLine(line, color, LINE_WIDTH));
}
}
@ -295,14 +306,14 @@ public class LayoutGrid extends LayoutGridLayerConfig {
Integer maxChildDepth,
Integer ownDepth) {
List<ColoredLine> coloredLines = getOrCreateVisualizationsOnPage(page.getNumber(), sections).getColoredLines();
Visualizations visualizations = semanticNode.getType().equals(NodeTypeProto.NodeType.TABLE_OF_CONTENTS) ? toc : sections;
List<ColoredLine> coloredLines = getOrCreateVisualizationsOnPage(page.getNumber(), visualizations).getColoredLines();
int lineWidthModifier = maxChildDepth - ownDepth;
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox()
.get(page), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier));
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(page), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier));
SemanticNode highestParent = semanticNode.getHighestParent();
Rectangle2D highestParentRect = rectangleMap.get(new RectangleIdentifier(highestParent.getTreeId(), page.getNumber()));
addPlacedText(page, rectangle2D, highestParentRect, treeIdString, maxChildDepth, sections, SECTION_COLOR);
addPlacedText(page, rectangle2D, highestParentRect, treeIdString, maxChildDepth, visualizations, SECTION_COLOR);
var lastPageLines = createLinesFromRectangle(r, page.getRotation());
if (semanticNode instanceof SuperSection) {
@ -347,8 +358,7 @@ public class LayoutGrid extends LayoutGridLayerConfig {
List<Double> ys = yStream.collect(Collectors.toList());
ys.remove(0);
Rectangle2D tableBBox = table.getBBox()
.get(page);
Rectangle2D tableBBox = table.getBBox().get(page);
List<ColoredLine> coloredLines = getOrCreateVisualizationsOnPage(page.getNumber(), tables).getColoredLines();
xs.forEach(x -> {
@ -384,6 +394,12 @@ public class LayoutGrid extends LayoutGridLayerConfig {
}
public void addTableOfContentsItem(SemanticNode semanticNode) {
addAsRectangle(semanticNode, toc, PARAGRAPH_COLOR);
}
private record RectangleAndLinesResult(List<ColoredLine> coloredLines, Rectangle2D rectangle, List<Line2D> pageLines) {
}

View File

@ -45,7 +45,6 @@ dependencies {
// for integration testing only
testImplementation(project(":viewer-doc-processor"))
testImplementation(project(":layoutparser-service-internal-api"))
testImplementation("com.google.protobuf:protobuf-java-util:4.27.1")
testImplementation("org.springframework.boot:spring-boot-starter-amqp:${springBootStarterVersion}")
testImplementation("org.springframework.boot:spring-boot-starter-test:${springBootStarterVersion}")

View File

@ -10,7 +10,6 @@ import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.function.Predicate;
import org.apache.commons.lang3.StringUtils;
import org.junit.jupiter.api.BeforeEach;
@ -28,7 +27,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Se
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SuperSection;
import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObject;
import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObjectTree;
import com.knecon.fforesight.service.layoutparser.processor.model.outline.TableOfContents;
import com.knecon.fforesight.service.layoutparser.processor.model.outline.SectionTree;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse;
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
@ -100,10 +99,10 @@ public class OutlineDetectionTest extends AbstractTest {
.flatMap(Collection::stream)
.allMatch(OutlineObject::isFound));
TableOfContents tableOfContents = classificationDocument.getTableOfContents();
SectionTree sectionTree = classificationDocument.getSectionTree();
assertEquals(tableOfContents.getMainSections().size(), 9);
assertEquals(tableOfContents.getMainSections().subList(1, 9)
assertEquals(sectionTree.getMainSections().size(), 9);
assertEquals(sectionTree.getMainSections().subList(1, 9)
.stream()
.map(tableOfContentItem -> sanitizeString(tableOfContentItem.getHeadline().toString()))
.toList(),
@ -121,14 +120,14 @@ public class OutlineDetectionTest extends AbstractTest {
// assertEquals(tableOfContents.getMainSections().get(6).getImages().size(), 1);
// assertEquals(tableOfContents.getMainSections().get(8).getChildren().get(2).getChildren().get(0).getChildren().get(2).getImages().size(), 1);
assertTrue(tableOfContents.getAllTableOfContentItems()
assertTrue(sectionTree.getAllTableOfContentItems()
.stream()
.allMatch(tableOfContentItem -> tableOfContentItem.getSection() != null));
assertTrue(tableOfContents.getAllTableOfContentItems()
assertTrue(sectionTree.getAllTableOfContentItems()
.stream()
.filter(tableOfContentItem -> tableOfContentItem.getChildren().isEmpty())
.allMatch(tableOfContentItem -> tableOfContentItem.getSection() instanceof Section));
assertTrue(tableOfContents.getAllTableOfContentItems()
assertTrue(sectionTree.getAllTableOfContentItems()
.stream()
.filter(tableOfContentItem -> !tableOfContentItem.getChildren().isEmpty())
.allMatch(tableOfContentItem -> tableOfContentItem.getSection() instanceof SuperSection));

View File

@ -150,14 +150,14 @@ public class PdfSegmentationServiceTest extends AbstractTest {
var tableServiceResponse = objectMapper.readValue(cvTablesResource.getInputStream(), TableServiceResponse.class);
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile(), tableServiceResponse);
assertThat(document.getTableOfContents().getAllTableOfContentItems()
assertThat(document.getSectionTree().getAllTableOfContentItems()
.stream()
.flatMap(tocItem -> tocItem.getSectionBlocks()
.stream()
.filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
.map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
.toList()).isNotEmpty();
var tables = document.getTableOfContents().getAllTableOfContentItems()
var tables = document.getSectionTree().getAllTableOfContentItems()
.stream()
.flatMap(tocItem -> tocItem.getSectionBlocks()
.stream()
@ -203,14 +203,14 @@ public class PdfSegmentationServiceTest extends AbstractTest {
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Spanning Cells - Page131_S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
assertThat(document.getTableOfContents().getAllTableOfContentItems()
assertThat(document.getSectionTree().getAllTableOfContentItems()
.stream()
.flatMap(tocItem -> tocItem.getSectionBlocks()
.stream()
.filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
.map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
.toList()).isNotEmpty();
TablePageBlock table = document.getTableOfContents().getAllTableOfContentItems()
TablePageBlock table = document.getSectionTree().getAllTableOfContentItems()
.stream()
.flatMap(tocItem -> tocItem.getSectionBlocks()
.stream()
@ -233,14 +233,14 @@ public class PdfSegmentationServiceTest extends AbstractTest {
"files/syngenta/CustomerFiles/SinglePages/Merge Table - Page5_26 A8637C - EU AIR3 - LCP Section 10 - Ecotoxicological studies on the plant protection product - Reference list.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
assertThat(document.getTableOfContents().getAllTableOfContentItems()
assertThat(document.getSectionTree().getAllTableOfContentItems()
.stream()
.flatMap(tocItem -> tocItem.getSectionBlocks()
.stream()
.filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
.map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
.toList()).isNotEmpty();
TablePageBlock firstTable = document.getTableOfContents().getAllTableOfContentItems()
TablePageBlock firstTable = document.getSectionTree().getAllTableOfContentItems()
.stream()
.flatMap(tocItem -> tocItem.getSectionBlocks()
.stream()
@ -250,7 +250,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
.get(0);
assertThat(firstTable.getColCount()).isEqualTo(8);
assertThat(firstTable.getRowCount()).isEqualTo(1);
TablePageBlock secondTable = document.getTableOfContents().getAllTableOfContentItems()
TablePageBlock secondTable = document.getSectionTree().getAllTableOfContentItems()
.stream()
.flatMap(tocItem -> tocItem.getSectionBlocks()
.stream()
@ -280,14 +280,14 @@ public class PdfSegmentationServiceTest extends AbstractTest {
"files/syngenta/CustomerFiles/SinglePages/Merge Multi Page Table - Page4_Page5_51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
assertThat(document.getTableOfContents().getAllTableOfContentItems()
assertThat(document.getSectionTree().getAllTableOfContentItems()
.stream()
.flatMap(tocItem -> tocItem.getSectionBlocks()
.stream()
.filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
.map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
.toList()).isNotEmpty();
TablePageBlock firstTable = document.getTableOfContents().getAllTableOfContentItems()
TablePageBlock firstTable = document.getSectionTree().getAllTableOfContentItems()
.stream()
.flatMap(tocItem -> tocItem.getSectionBlocks()
.stream()
@ -297,7 +297,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
.get(0);
assertThat(firstTable.getColCount()).isEqualTo(9);
assertThat(firstTable.getRowCount()).isEqualTo(5);
TablePageBlock secondTable = document.getTableOfContents().getAllTableOfContentItems()
TablePageBlock secondTable = document.getSectionTree().getAllTableOfContentItems()
.stream()
.flatMap(tocItem -> tocItem.getSectionBlocks()
.stream()
@ -327,14 +327,14 @@ public class PdfSegmentationServiceTest extends AbstractTest {
"files/syngenta/CustomerFiles/SinglePages/Rotated Table Headers - Page4_65 Mesotrione - EU AIR3 - LCA Section 1 Supplement Reference List.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
assertThat(document.getTableOfContents().getAllTableOfContentItems()
assertThat(document.getSectionTree().getAllTableOfContentItems()
.stream()
.flatMap(tocItem -> tocItem.getSectionBlocks()
.stream()
.filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
.map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
.toList()).isNotEmpty();
TablePageBlock firstTable = document.getTableOfContents().getAllTableOfContentItems()
TablePageBlock firstTable = document.getSectionTree().getAllTableOfContentItems()
.stream()
.flatMap(tocItem -> tocItem.getSectionBlocks()
.stream()
@ -344,7 +344,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
.get(0);
assertThat(firstTable.getColCount()).isEqualTo(8);
assertThat(firstTable.getRowCount()).isEqualTo(1);
TablePageBlock secondTable = document.getTableOfContents().getAllTableOfContentItems()
TablePageBlock secondTable = document.getSectionTree().getAllTableOfContentItems()
.stream()
.flatMap(tocItem -> tocItem.getSectionBlocks()
.stream()
@ -844,7 +844,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@SneakyThrows
private void toHtml(ClassificationDocument document, String filename) {
var tables = document.getTableOfContents().getAllTableOfContentItems()
var tables = document.getSectionTree().getAllTableOfContentItems()
.stream()
.flatMap(tocItem -> tocItem.getSectionBlocks()
.stream()
@ -871,7 +871,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
private void validateTable(ClassificationDocument document, int tableIndex, int colCount, int rowCount, int emptyCellsCountCorrect, int emptyCellsCountIncorrect) {
TablePageBlock table = document.getTableOfContents().getAllTableOfContentItems()
TablePageBlock table = document.getSectionTree().getAllTableOfContentItems()
.stream()
.flatMap(tocItem -> tocItem.getSectionBlocks()
.stream()
@ -901,7 +901,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
private void validateTable(ClassificationDocument document, int tableIndex, List<List<String>> values) {
TablePageBlock table = document.getTableOfContents().getAllTableOfContentItems()
TablePageBlock table = document.getSectionTree().getAllTableOfContentItems()
.stream()
.flatMap(tocItem -> tocItem.getSectionBlocks()
.stream()
@ -929,7 +929,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
private void validateTableSize(ClassificationDocument document, int tableSize) {
assertThat(document.getTableOfContents().getAllTableOfContentItems()
assertThat(document.getSectionTree().getAllTableOfContentItems()
.stream()
.flatMap(tocItem -> tocItem.getSectionBlocks()
.stream()

View File

@ -18,8 +18,8 @@ import org.springframework.core.io.ClassPathResource;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructureWrapper;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.EntryDataProto.EntryData;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeTypeProto.NodeType;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.EntryDataProto.EntryData;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import com.knecon.fforesight.service.layoutparser.processor.model.PageContents;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;

View File

@ -227,9 +227,9 @@ public class PdfDraw {
return DrawingOptions.builder().stroke(true).strokeColor(switch (entry.getType()) {
case DOCUMENT -> Color.LIGHT_GRAY;
case HEADER, FOOTER -> Color.GREEN;
case PARAGRAPH -> Color.BLUE;
case PARAGRAPH, TABLE_OF_CONTENTS_ITEM -> Color.BLUE;
case HEADLINE -> Color.RED;
case SECTION, SUPER_SECTION -> Color.BLACK;
case SECTION, SUPER_SECTION, TABLE_OF_CONTENTS -> Color.BLACK;
case TABLE -> Color.ORANGE;
case TABLE_CELL -> Color.GRAY;
case IMAGE -> Color.MAGENTA;

View File

@ -40,7 +40,7 @@ public record LayerIdentifier(String name, String markedContentName) {
public static final LayerIdentifier KNECON_LAYOUT_FIGURES = new LayerIdentifier("Figures", "LAYOUT_FIGURES");
public static final LayerIdentifier KNECON_LAYOUT_IMAGES = new LayerIdentifier("Images", "LAYOUT_IMAGES");
public static final LayerIdentifier KNECON_LAYOUT_TREE_IDs = new LayerIdentifier("Tree IDs", "LAYOUT_TREE_IDs");
public static final LayerIdentifier OUTLINE_HEADLINES = new LayerIdentifier("Outline Headlines", "OUTLINE_HEADLINES");
public static final LayerIdentifier KNECON_LAYOUT_TOC = new LayerIdentifier("Table of Contents", "TABLE_OF_CONTENTS");
//layout grid debug
public static final LayerIdentifier KNECON_LAYOUT_DEBUG = new LayerIdentifier("Layout elements", "DEBUG_LAYOUT");
@ -55,6 +55,7 @@ public record LayerIdentifier(String name, String markedContentName) {
public static final LayerIdentifier NEIGHBOURS = new LayerIdentifier("Neighbours", "NEIGHBOURS");
public static final LayerIdentifier CHARACTERS = new LayerIdentifier("Characters", "CHARACTERS");
public static final LayerIdentifier OUTLINE_OBJECTS = new LayerIdentifier("Outline Positions", "OUTLINE_OBJECTS");
public static final LayerIdentifier OUTLINE_HEADLINES = new LayerIdentifier("Outline Headlines", "OUTLINE_HEADLINES");
public static final LayerIdentifier SENTENCES = new LayerIdentifier("Sentences", "SENTENCES");
public static final LayerIdentifier TOC_PAGES = new LayerIdentifier("TOC pages", "TOC_PAGES");
public static final LayerIdentifier TOC_BLOCKS = new LayerIdentifier("TOC blocks", "TOC_BLOCKS");

View File

@ -32,6 +32,8 @@ public class LayoutDebugLayerConfig extends AbstractLayerGroup {
protected static final Color UNDERLINE_RULING_COLOR = new Color(6, 39, 171);
protected static final Color STRIKETROUGH_RULING_COLOR = new Color(171, 6, 6);
protected static final Color HEADLINE_COLOR = new Color(162, 56, 56);
protected static final Color CELLS_COLOR = new Color(31, 214, 27);
protected static final Color OUTLINE_OBJECT_COLOR = new Color(214, 27, 183);
@ -62,7 +64,7 @@ public class LayoutDebugLayerConfig extends AbstractLayerGroup {
protected final Visualizations tocPages = Visualizations.builder().layer(LayerIdentifier.TOC_PAGES).build();
protected final Visualizations tocBlocks = Visualizations.builder().layer(LayerIdentifier.TOC_BLOCKS).build();
protected final Visualizations listIdentifiers = Visualizations.builder().layer(LayerIdentifier.LIST_IDENTIFIERS).build();
protected final Visualizations outlineHeadlines = Visualizations.builder().layer(LayerIdentifier.OUTLINE_HEADLINES).build();
public List<Visualizations> getVisualizations() {
@ -78,6 +80,7 @@ public class LayoutDebugLayerConfig extends AbstractLayerGroup {
mainBody, //
markedContent, //
outlineObjects, //
outlineHeadlines, //
tocPages, //
tocBlocks, //
listIdentifiers //

View File

@ -34,8 +34,10 @@ public class LayoutGridLayerConfig extends AbstractLayerGroup {
protected static final Color KEY_VALUE_BBOX_COLOR = new Color(0, 39, 85);
protected static final Color KEY_COLOR = new Color(30, 92, 172);
protected static final Color VALUE_COLOR = new Color(30, 172, 146);
protected static final Color TOC_COLOR = new Color(0, 86, 198);
protected final Visualizations sections = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_SECTION).visibleByDefault(true).build();
protected final Visualizations toc = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_TOC).visibleByDefault(true).build();
protected final Visualizations paragraphs = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_PARAGRAPH).visibleByDefault(true).build();
protected final Visualizations headlines = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_HEADLINE).visibleByDefault(true).build();
protected final Visualizations tables = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_TABLE).visibleByDefault(true).build();
@ -44,12 +46,12 @@ public class LayoutGridLayerConfig extends AbstractLayerGroup {
protected final Visualizations images = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_IMAGES).build();
protected final Visualizations keyValue = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_KEY_VALUE).build();
protected final Visualizations treeIds = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_TREE_IDs).build();
protected final Visualizations outlineHeadlines = Visualizations.builder().layer(LayerIdentifier.OUTLINE_HEADLINES).build();
@Override
public List<Visualizations> getVisualizations() {
return List.of(headlines, paragraphs, tables, sections, headerFooter, keyValue, figures, images, treeIds, outlineHeadlines);
return List.of(headlines, paragraphs, tables, sections, headerFooter, toc, keyValue, figures, images, treeIds);
}
}