RED-9139: add new TableOfContents Node
* rename previous TableOfContent to SectionTree * added protobuf compile script
This commit is contained in:
parent
1384584e2f
commit
6e04c15f3d
@ -7,5 +7,5 @@ description = "layoutparser-service-internal-api"
|
||||
|
||||
dependencies {
|
||||
implementation("io.swagger.core.v3:swagger-annotations:2.2.15")
|
||||
implementation("com.google.protobuf:protobuf-java-util:4.27.1")
|
||||
api("com.google.protobuf:protobuf-java-util:4.28.3")
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,16 +1,14 @@
|
||||
package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
|
||||
|
||||
import static com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructureProto.DocumentStructure;
|
||||
import static com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.EntryDataProto.EntryData;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.io.ObjectStreamException;
|
||||
import java.io.Serializable;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.EntryDataProto.EntryData;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,193 +1,177 @@
|
||||
package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
|
||||
|
||||
// Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
// NO CHECKED-IN PROTOBUF GENCODE
|
||||
// source: LayoutEngine.proto
|
||||
// Protobuf Java Version: 4.27.1
|
||||
@SuppressWarnings("all")
|
||||
// Protobuf Java Version: 4.28.3
|
||||
|
||||
package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
|
||||
|
||||
public final class LayoutEngineProto {
|
||||
private LayoutEngineProto() {}
|
||||
static {
|
||||
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
|
||||
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
|
||||
/* major= */ 4,
|
||||
/* minor= */ 28,
|
||||
/* patch= */ 3,
|
||||
/* suffix= */ "",
|
||||
LayoutEngineProto.class.getName());
|
||||
}
|
||||
public static void registerAllExtensions(
|
||||
com.google.protobuf.ExtensionRegistryLite registry) {
|
||||
}
|
||||
|
||||
private LayoutEngineProto() {}
|
||||
|
||||
public static void registerAllExtensions(
|
||||
com.google.protobuf.ExtensionRegistry registry) {
|
||||
registerAllExtensions(
|
||||
(com.google.protobuf.ExtensionRegistryLite) registry);
|
||||
}
|
||||
/**
|
||||
* Protobuf enum {@code LayoutEngine}
|
||||
*/
|
||||
public enum LayoutEngine
|
||||
implements com.google.protobuf.ProtocolMessageEnum {
|
||||
/**
|
||||
* <code>ALGORITHM = 0;</code>
|
||||
*/
|
||||
ALGORITHM(0),
|
||||
/**
|
||||
* <code>AI = 1;</code>
|
||||
*/
|
||||
AI(1),
|
||||
/**
|
||||
* <code>OUTLINE = 2;</code>
|
||||
*/
|
||||
OUTLINE(2),
|
||||
UNRECOGNIZED(-1),
|
||||
;
|
||||
|
||||
static {
|
||||
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
|
||||
/* major= */ 4,
|
||||
/* minor= */ 27,
|
||||
/* patch= */ 1,
|
||||
/* suffix= */ "", LayoutEngineProto.class.getName());
|
||||
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
|
||||
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
|
||||
/* major= */ 4,
|
||||
/* minor= */ 28,
|
||||
/* patch= */ 3,
|
||||
/* suffix= */ "",
|
||||
LayoutEngine.class.getName());
|
||||
}
|
||||
/**
|
||||
* <code>ALGORITHM = 0;</code>
|
||||
*/
|
||||
public static final int ALGORITHM_VALUE = 0;
|
||||
/**
|
||||
* <code>AI = 1;</code>
|
||||
*/
|
||||
public static final int AI_VALUE = 1;
|
||||
/**
|
||||
* <code>OUTLINE = 2;</code>
|
||||
*/
|
||||
public static final int OUTLINE_VALUE = 2;
|
||||
|
||||
public static void registerAllExtensions(com.google.protobuf.ExtensionRegistryLite registry) {
|
||||
|
||||
public final int getNumber() {
|
||||
if (this == UNRECOGNIZED) {
|
||||
throw new java.lang.IllegalArgumentException(
|
||||
"Can't get the number of an unknown enum value.");
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
public static void registerAllExtensions(com.google.protobuf.ExtensionRegistry registry) {
|
||||
|
||||
registerAllExtensions((com.google.protobuf.ExtensionRegistryLite) registry);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Protobuf enum {@code LayoutEngine}
|
||||
* @param value The numeric wire value of the corresponding enum entry.
|
||||
* @return The enum associated with the given numeric wire value.
|
||||
* @deprecated Use {@link #forNumber(int)} instead.
|
||||
*/
|
||||
public enum LayoutEngine implements com.google.protobuf.ProtocolMessageEnum {
|
||||
/**
|
||||
* <code>ALGORITHM = 0;</code>
|
||||
*/
|
||||
ALGORITHM(0),
|
||||
/**
|
||||
* <code>AI = 1;</code>
|
||||
*/
|
||||
AI(1),
|
||||
/**
|
||||
* <code>OUTLINE = 2;</code>
|
||||
*/
|
||||
OUTLINE(2),
|
||||
UNRECOGNIZED(-1),
|
||||
;
|
||||
@java.lang.Deprecated
|
||||
public static LayoutEngine valueOf(int value) {
|
||||
return forNumber(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param value The numeric wire value of the corresponding enum entry.
|
||||
* @return The enum associated with the given numeric wire value.
|
||||
*/
|
||||
public static LayoutEngine forNumber(int value) {
|
||||
switch (value) {
|
||||
case 0: return ALGORITHM;
|
||||
case 1: return AI;
|
||||
case 2: return OUTLINE;
|
||||
default: return null;
|
||||
}
|
||||
}
|
||||
|
||||
static {
|
||||
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
|
||||
/* major= */ 4,
|
||||
/* minor= */ 27,
|
||||
/* patch= */ 1,
|
||||
/* suffix= */ "", LayoutEngine.class.getName());
|
||||
}
|
||||
|
||||
/**
|
||||
* <code>ALGORITHM = 0;</code>
|
||||
*/
|
||||
public static final int ALGORITHM_VALUE = 0;
|
||||
/**
|
||||
* <code>AI = 1;</code>
|
||||
*/
|
||||
public static final int AI_VALUE = 1;
|
||||
/**
|
||||
* <code>OUTLINE = 2;</code>
|
||||
*/
|
||||
public static final int OUTLINE_VALUE = 2;
|
||||
|
||||
|
||||
public final int getNumber() {
|
||||
|
||||
if (this == UNRECOGNIZED) {
|
||||
throw new IllegalArgumentException("Can't get the number of an unknown enum value.");
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @param value The numeric wire value of the corresponding enum entry.
|
||||
* @return The enum associated with the given numeric wire value.
|
||||
* @deprecated Use {@link #forNumber(int)} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public static LayoutEngine valueOf(int value) {
|
||||
|
||||
return forNumber(value);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @param value The numeric wire value of the corresponding enum entry.
|
||||
* @return The enum associated with the given numeric wire value.
|
||||
*/
|
||||
public static LayoutEngine forNumber(int value) {
|
||||
|
||||
switch (value) {
|
||||
case 0:
|
||||
return ALGORITHM;
|
||||
case 1:
|
||||
return AI;
|
||||
case 2:
|
||||
return OUTLINE;
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static com.google.protobuf.Internal.EnumLiteMap<LayoutEngine> internalGetValueMap() {
|
||||
|
||||
return internalValueMap;
|
||||
}
|
||||
|
||||
|
||||
private static final com.google.protobuf.Internal.EnumLiteMap<LayoutEngine> internalValueMap = new com.google.protobuf.Internal.EnumLiteMap<LayoutEngine>() {
|
||||
public static com.google.protobuf.Internal.EnumLiteMap<LayoutEngine>
|
||||
internalGetValueMap() {
|
||||
return internalValueMap;
|
||||
}
|
||||
private static final com.google.protobuf.Internal.EnumLiteMap<
|
||||
LayoutEngine> internalValueMap =
|
||||
new com.google.protobuf.Internal.EnumLiteMap<LayoutEngine>() {
|
||||
public LayoutEngine findValueByNumber(int number) {
|
||||
|
||||
return LayoutEngine.forNumber(number);
|
||||
return LayoutEngine.forNumber(number);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
public final com.google.protobuf.Descriptors.EnumValueDescriptor getValueDescriptor() {
|
||||
|
||||
if (this == UNRECOGNIZED) {
|
||||
throw new IllegalStateException("Can't get the descriptor of an unrecognized enum value.");
|
||||
}
|
||||
return getDescriptor().getValues()
|
||||
.get(ordinal());
|
||||
}
|
||||
|
||||
|
||||
public final com.google.protobuf.Descriptors.EnumDescriptor getDescriptorForType() {
|
||||
|
||||
return getDescriptor();
|
||||
}
|
||||
|
||||
|
||||
public static final com.google.protobuf.Descriptors.EnumDescriptor getDescriptor() {
|
||||
|
||||
return LayoutEngineProto.getDescriptor().getEnumTypes()
|
||||
.get(0);
|
||||
}
|
||||
|
||||
|
||||
private static final LayoutEngine[] VALUES = values();
|
||||
|
||||
|
||||
public static LayoutEngine valueOf(com.google.protobuf.Descriptors.EnumValueDescriptor desc) {
|
||||
|
||||
if (desc.getType() != getDescriptor()) {
|
||||
throw new IllegalArgumentException("EnumValueDescriptor is not for this type.");
|
||||
}
|
||||
if (desc.getIndex() == -1) {
|
||||
return UNRECOGNIZED;
|
||||
}
|
||||
return VALUES[desc.getIndex()];
|
||||
}
|
||||
|
||||
|
||||
private final int value;
|
||||
|
||||
|
||||
private LayoutEngine(int value) {
|
||||
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
// @@protoc_insertion_point(enum_scope:LayoutEngine)
|
||||
public final com.google.protobuf.Descriptors.EnumValueDescriptor
|
||||
getValueDescriptor() {
|
||||
if (this == UNRECOGNIZED) {
|
||||
throw new java.lang.IllegalStateException(
|
||||
"Can't get the descriptor of an unrecognized enum value.");
|
||||
}
|
||||
return getDescriptor().getValues().get(ordinal());
|
||||
}
|
||||
public final com.google.protobuf.Descriptors.EnumDescriptor
|
||||
getDescriptorForType() {
|
||||
return getDescriptor();
|
||||
}
|
||||
public static final com.google.protobuf.Descriptors.EnumDescriptor
|
||||
getDescriptor() {
|
||||
return com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngineProto.getDescriptor().getEnumTypes().get(0);
|
||||
}
|
||||
|
||||
private static final LayoutEngine[] VALUES = values();
|
||||
|
||||
public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() {
|
||||
|
||||
return descriptor;
|
||||
public static LayoutEngine valueOf(
|
||||
com.google.protobuf.Descriptors.EnumValueDescriptor desc) {
|
||||
if (desc.getType() != getDescriptor()) {
|
||||
throw new java.lang.IllegalArgumentException(
|
||||
"EnumValueDescriptor is not for this type.");
|
||||
}
|
||||
if (desc.getIndex() == -1) {
|
||||
return UNRECOGNIZED;
|
||||
}
|
||||
return VALUES[desc.getIndex()];
|
||||
}
|
||||
|
||||
private final int value;
|
||||
|
||||
private static com.google.protobuf.Descriptors.FileDescriptor descriptor;
|
||||
|
||||
static {
|
||||
String[] descriptorData = {"\n\022LayoutEngine.proto*2\n\014LayoutEngine\022\r\n\t" + "ALGORITHM\020\000\022\006\n\002AI\020\001\022\013\n\007OUTLINE\020\002b\006proto3"};
|
||||
descriptor = com.google.protobuf.Descriptors.FileDescriptor.internalBuildGeneratedFileFrom(descriptorData, new com.google.protobuf.Descriptors.FileDescriptor[]{});
|
||||
descriptor.resolveAllFeaturesImmutable();
|
||||
private LayoutEngine(int value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
// @@protoc_insertion_point(outer_class_scope)
|
||||
// @@protoc_insertion_point(enum_scope:LayoutEngine)
|
||||
}
|
||||
|
||||
|
||||
public static com.google.protobuf.Descriptors.FileDescriptor
|
||||
getDescriptor() {
|
||||
return descriptor;
|
||||
}
|
||||
private static com.google.protobuf.Descriptors.FileDescriptor
|
||||
descriptor;
|
||||
static {
|
||||
java.lang.String[] descriptorData = {
|
||||
"\n\022LayoutEngine.proto*2\n\014LayoutEngine\022\r\n\t" +
|
||||
"ALGORITHM\020\000\022\006\n\002AI\020\001\022\013\n\007OUTLINE\020\002B[\nFcom." +
|
||||
"knecon.fforesight.service.layoutparser.i" +
|
||||
"nternal.api.data.redactionB\021LayoutEngine" +
|
||||
"Protob\006proto3"
|
||||
};
|
||||
descriptor = com.google.protobuf.Descriptors.FileDescriptor
|
||||
.internalBuildGeneratedFileFrom(descriptorData,
|
||||
new com.google.protobuf.Descriptors.FileDescriptor[] {
|
||||
});
|
||||
descriptor.resolveAllFeaturesImmutable();
|
||||
}
|
||||
|
||||
// @@protoc_insertion_point(outer_class_scope)
|
||||
}
|
||||
|
||||
@ -1,274 +1,261 @@
|
||||
package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
|
||||
|
||||
import java.util.Locale;
|
||||
// Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
// NO CHECKED-IN PROTOBUF GENCODE
|
||||
// source: NodeType.proto
|
||||
// Protobuf Java Version: 4.27.1
|
||||
@SuppressWarnings("all")
|
||||
// Protobuf Java Version: 4.28.3
|
||||
|
||||
package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
|
||||
|
||||
public final class NodeTypeProto {
|
||||
private NodeTypeProto() {}
|
||||
static {
|
||||
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
|
||||
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
|
||||
/* major= */ 4,
|
||||
/* minor= */ 28,
|
||||
/* patch= */ 3,
|
||||
/* suffix= */ "",
|
||||
NodeTypeProto.class.getName());
|
||||
}
|
||||
public static void registerAllExtensions(
|
||||
com.google.protobuf.ExtensionRegistryLite registry) {
|
||||
}
|
||||
|
||||
private NodeTypeProto() {}
|
||||
|
||||
public static void registerAllExtensions(
|
||||
com.google.protobuf.ExtensionRegistry registry) {
|
||||
registerAllExtensions(
|
||||
(com.google.protobuf.ExtensionRegistryLite) registry);
|
||||
}
|
||||
/**
|
||||
* Protobuf enum {@code NodeType}
|
||||
*/
|
||||
public enum NodeType
|
||||
implements com.google.protobuf.ProtocolMessageEnum {
|
||||
/**
|
||||
* <code>DOCUMENT = 0;</code>
|
||||
*/
|
||||
DOCUMENT(0),
|
||||
/**
|
||||
* <code>SECTION = 1;</code>
|
||||
*/
|
||||
SECTION(1),
|
||||
/**
|
||||
* <code>SUPER_SECTION = 2;</code>
|
||||
*/
|
||||
SUPER_SECTION(2),
|
||||
/**
|
||||
* <code>HEADLINE = 3;</code>
|
||||
*/
|
||||
HEADLINE(3),
|
||||
/**
|
||||
* <code>PARAGRAPH = 4;</code>
|
||||
*/
|
||||
PARAGRAPH(4),
|
||||
/**
|
||||
* <code>TABLE = 5;</code>
|
||||
*/
|
||||
TABLE(5),
|
||||
/**
|
||||
* <code>TABLE_CELL = 6;</code>
|
||||
*/
|
||||
TABLE_CELL(6),
|
||||
/**
|
||||
* <code>IMAGE = 7;</code>
|
||||
*/
|
||||
IMAGE(7),
|
||||
/**
|
||||
* <code>HEADER = 8;</code>
|
||||
*/
|
||||
HEADER(8),
|
||||
/**
|
||||
* <code>FOOTER = 9;</code>
|
||||
*/
|
||||
FOOTER(9),
|
||||
/**
|
||||
* <code>TABLE_OF_CONTENTS = 10;</code>
|
||||
*/
|
||||
TABLE_OF_CONTENTS(10),
|
||||
/**
|
||||
* <code>TABLE_OF_CONTENTS_ITEM = 11;</code>
|
||||
*/
|
||||
TABLE_OF_CONTENTS_ITEM(11),
|
||||
UNRECOGNIZED(-1),
|
||||
;
|
||||
|
||||
static {
|
||||
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
|
||||
/* major= */ 4,
|
||||
/* minor= */ 27,
|
||||
/* patch= */ 1,
|
||||
/* suffix= */ "", NodeTypeProto.class.getName());
|
||||
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
|
||||
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
|
||||
/* major= */ 4,
|
||||
/* minor= */ 28,
|
||||
/* patch= */ 3,
|
||||
/* suffix= */ "",
|
||||
NodeType.class.getName());
|
||||
}
|
||||
/**
|
||||
* <code>DOCUMENT = 0;</code>
|
||||
*/
|
||||
public static final int DOCUMENT_VALUE = 0;
|
||||
/**
|
||||
* <code>SECTION = 1;</code>
|
||||
*/
|
||||
public static final int SECTION_VALUE = 1;
|
||||
/**
|
||||
* <code>SUPER_SECTION = 2;</code>
|
||||
*/
|
||||
public static final int SUPER_SECTION_VALUE = 2;
|
||||
/**
|
||||
* <code>HEADLINE = 3;</code>
|
||||
*/
|
||||
public static final int HEADLINE_VALUE = 3;
|
||||
/**
|
||||
* <code>PARAGRAPH = 4;</code>
|
||||
*/
|
||||
public static final int PARAGRAPH_VALUE = 4;
|
||||
/**
|
||||
* <code>TABLE = 5;</code>
|
||||
*/
|
||||
public static final int TABLE_VALUE = 5;
|
||||
/**
|
||||
* <code>TABLE_CELL = 6;</code>
|
||||
*/
|
||||
public static final int TABLE_CELL_VALUE = 6;
|
||||
/**
|
||||
* <code>IMAGE = 7;</code>
|
||||
*/
|
||||
public static final int IMAGE_VALUE = 7;
|
||||
/**
|
||||
* <code>HEADER = 8;</code>
|
||||
*/
|
||||
public static final int HEADER_VALUE = 8;
|
||||
/**
|
||||
* <code>FOOTER = 9;</code>
|
||||
*/
|
||||
public static final int FOOTER_VALUE = 9;
|
||||
/**
|
||||
* <code>TABLE_OF_CONTENTS = 10;</code>
|
||||
*/
|
||||
public static final int TABLE_OF_CONTENTS_VALUE = 10;
|
||||
/**
|
||||
* <code>TABLE_OF_CONTENTS_ITEM = 11;</code>
|
||||
*/
|
||||
public static final int TABLE_OF_CONTENTS_ITEM_VALUE = 11;
|
||||
|
||||
public static void registerAllExtensions(com.google.protobuf.ExtensionRegistryLite registry) {
|
||||
|
||||
public final int getNumber() {
|
||||
if (this == UNRECOGNIZED) {
|
||||
throw new java.lang.IllegalArgumentException(
|
||||
"Can't get the number of an unknown enum value.");
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
public static void registerAllExtensions(com.google.protobuf.ExtensionRegistry registry) {
|
||||
|
||||
registerAllExtensions((com.google.protobuf.ExtensionRegistryLite) registry);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Protobuf enum {@code NodeType}
|
||||
* @param value The numeric wire value of the corresponding enum entry.
|
||||
* @return The enum associated with the given numeric wire value.
|
||||
* @deprecated Use {@link #forNumber(int)} instead.
|
||||
*/
|
||||
public enum NodeType implements com.google.protobuf.ProtocolMessageEnum {
|
||||
/**
|
||||
* <code>DOCUMENT = 0;</code>
|
||||
*/
|
||||
DOCUMENT(0),
|
||||
/**
|
||||
* <code>SECTION = 1;</code>
|
||||
*/
|
||||
SECTION(1),
|
||||
/**
|
||||
* <code>SUPER_SECTION = 2;</code>
|
||||
*/
|
||||
SUPER_SECTION(2),
|
||||
/**
|
||||
* <code>HEADLINE = 3;</code>
|
||||
*/
|
||||
HEADLINE(3),
|
||||
/**
|
||||
* <code>PARAGRAPH = 4;</code>
|
||||
*/
|
||||
PARAGRAPH(4),
|
||||
/**
|
||||
* <code>TABLE = 5;</code>
|
||||
*/
|
||||
TABLE(5),
|
||||
/**
|
||||
* <code>TABLE_CELL = 6;</code>
|
||||
*/
|
||||
TABLE_CELL(6),
|
||||
/**
|
||||
* <code>IMAGE = 7;</code>
|
||||
*/
|
||||
IMAGE(7),
|
||||
/**
|
||||
* <code>HEADER = 8;</code>
|
||||
*/
|
||||
HEADER(8),
|
||||
/**
|
||||
* <code>FOOTER = 9;</code>
|
||||
*/
|
||||
FOOTER(9),
|
||||
UNRECOGNIZED(-1),
|
||||
;
|
||||
@java.lang.Deprecated
|
||||
public static NodeType valueOf(int value) {
|
||||
return forNumber(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param value The numeric wire value of the corresponding enum entry.
|
||||
* @return The enum associated with the given numeric wire value.
|
||||
*/
|
||||
public static NodeType forNumber(int value) {
|
||||
switch (value) {
|
||||
case 0: return DOCUMENT;
|
||||
case 1: return SECTION;
|
||||
case 2: return SUPER_SECTION;
|
||||
case 3: return HEADLINE;
|
||||
case 4: return PARAGRAPH;
|
||||
case 5: return TABLE;
|
||||
case 6: return TABLE_CELL;
|
||||
case 7: return IMAGE;
|
||||
case 8: return HEADER;
|
||||
case 9: return FOOTER;
|
||||
case 10: return TABLE_OF_CONTENTS;
|
||||
case 11: return TABLE_OF_CONTENTS_ITEM;
|
||||
default: return null;
|
||||
}
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
|
||||
return this.name().charAt(0) + this.name().substring(1).toLowerCase(Locale.ROOT);
|
||||
}
|
||||
|
||||
|
||||
static {
|
||||
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
|
||||
/* major= */ 4,
|
||||
/* minor= */ 27,
|
||||
/* patch= */ 1,
|
||||
/* suffix= */ "", NodeType.class.getName());
|
||||
}
|
||||
|
||||
/**
|
||||
* <code>DOCUMENT = 0;</code>
|
||||
*/
|
||||
public static final int DOCUMENT_VALUE = 0;
|
||||
/**
|
||||
* <code>SECTION = 1;</code>
|
||||
*/
|
||||
public static final int SECTION_VALUE = 1;
|
||||
/**
|
||||
* <code>SUPER_SECTION = 2;</code>
|
||||
*/
|
||||
public static final int SUPER_SECTION_VALUE = 2;
|
||||
/**
|
||||
* <code>HEADLINE = 3;</code>
|
||||
*/
|
||||
public static final int HEADLINE_VALUE = 3;
|
||||
/**
|
||||
* <code>PARAGRAPH = 4;</code>
|
||||
*/
|
||||
public static final int PARAGRAPH_VALUE = 4;
|
||||
/**
|
||||
* <code>TABLE = 5;</code>
|
||||
*/
|
||||
public static final int TABLE_VALUE = 5;
|
||||
/**
|
||||
* <code>TABLE_CELL = 6;</code>
|
||||
*/
|
||||
public static final int TABLE_CELL_VALUE = 6;
|
||||
/**
|
||||
* <code>IMAGE = 7;</code>
|
||||
*/
|
||||
public static final int IMAGE_VALUE = 7;
|
||||
/**
|
||||
* <code>HEADER = 8;</code>
|
||||
*/
|
||||
public static final int HEADER_VALUE = 8;
|
||||
/**
|
||||
* <code>FOOTER = 9;</code>
|
||||
*/
|
||||
public static final int FOOTER_VALUE = 9;
|
||||
|
||||
|
||||
public final int getNumber() {
|
||||
|
||||
if (this == UNRECOGNIZED) {
|
||||
throw new IllegalArgumentException("Can't get the number of an unknown enum value.");
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @param value The numeric wire value of the corresponding enum entry.
|
||||
* @return The enum associated with the given numeric wire value.
|
||||
* @deprecated Use {@link #forNumber(int)} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public static NodeType valueOf(int value) {
|
||||
|
||||
return forNumber(value);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @param value The numeric wire value of the corresponding enum entry.
|
||||
* @return The enum associated with the given numeric wire value.
|
||||
*/
|
||||
public static NodeType forNumber(int value) {
|
||||
|
||||
switch (value) {
|
||||
case 0:
|
||||
return DOCUMENT;
|
||||
case 1:
|
||||
return SECTION;
|
||||
case 2:
|
||||
return SUPER_SECTION;
|
||||
case 3:
|
||||
return HEADLINE;
|
||||
case 4:
|
||||
return PARAGRAPH;
|
||||
case 5:
|
||||
return TABLE;
|
||||
case 6:
|
||||
return TABLE_CELL;
|
||||
case 7:
|
||||
return IMAGE;
|
||||
case 8:
|
||||
return HEADER;
|
||||
case 9:
|
||||
return FOOTER;
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static com.google.protobuf.Internal.EnumLiteMap<NodeType> internalGetValueMap() {
|
||||
|
||||
return internalValueMap;
|
||||
}
|
||||
|
||||
|
||||
private static final com.google.protobuf.Internal.EnumLiteMap<NodeType> internalValueMap = new com.google.protobuf.Internal.EnumLiteMap<NodeType>() {
|
||||
public static com.google.protobuf.Internal.EnumLiteMap<NodeType>
|
||||
internalGetValueMap() {
|
||||
return internalValueMap;
|
||||
}
|
||||
private static final com.google.protobuf.Internal.EnumLiteMap<
|
||||
NodeType> internalValueMap =
|
||||
new com.google.protobuf.Internal.EnumLiteMap<NodeType>() {
|
||||
public NodeType findValueByNumber(int number) {
|
||||
|
||||
return NodeType.forNumber(number);
|
||||
return NodeType.forNumber(number);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
public final com.google.protobuf.Descriptors.EnumValueDescriptor getValueDescriptor() {
|
||||
|
||||
if (this == UNRECOGNIZED) {
|
||||
throw new IllegalStateException("Can't get the descriptor of an unrecognized enum value.");
|
||||
}
|
||||
return getDescriptor().getValues()
|
||||
.get(ordinal());
|
||||
}
|
||||
|
||||
|
||||
public final com.google.protobuf.Descriptors.EnumDescriptor getDescriptorForType() {
|
||||
|
||||
return getDescriptor();
|
||||
}
|
||||
|
||||
|
||||
public static final com.google.protobuf.Descriptors.EnumDescriptor getDescriptor() {
|
||||
|
||||
return NodeTypeProto.getDescriptor().getEnumTypes()
|
||||
.get(0);
|
||||
}
|
||||
|
||||
|
||||
private static final NodeType[] VALUES = values();
|
||||
|
||||
|
||||
public static NodeType valueOf(com.google.protobuf.Descriptors.EnumValueDescriptor desc) {
|
||||
|
||||
if (desc.getType() != getDescriptor()) {
|
||||
throw new IllegalArgumentException("EnumValueDescriptor is not for this type.");
|
||||
}
|
||||
if (desc.getIndex() == -1) {
|
||||
return UNRECOGNIZED;
|
||||
}
|
||||
return VALUES[desc.getIndex()];
|
||||
}
|
||||
|
||||
|
||||
private final int value;
|
||||
|
||||
|
||||
private NodeType(int value) {
|
||||
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
// @@protoc_insertion_point(enum_scope:NodeType)
|
||||
public final com.google.protobuf.Descriptors.EnumValueDescriptor
|
||||
getValueDescriptor() {
|
||||
if (this == UNRECOGNIZED) {
|
||||
throw new java.lang.IllegalStateException(
|
||||
"Can't get the descriptor of an unrecognized enum value.");
|
||||
}
|
||||
return getDescriptor().getValues().get(ordinal());
|
||||
}
|
||||
public final com.google.protobuf.Descriptors.EnumDescriptor
|
||||
getDescriptorForType() {
|
||||
return getDescriptor();
|
||||
}
|
||||
public static final com.google.protobuf.Descriptors.EnumDescriptor
|
||||
getDescriptor() {
|
||||
return com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeTypeProto.getDescriptor().getEnumTypes().get(0);
|
||||
}
|
||||
|
||||
private static final NodeType[] VALUES = values();
|
||||
|
||||
public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() {
|
||||
|
||||
return descriptor;
|
||||
public static NodeType valueOf(
|
||||
com.google.protobuf.Descriptors.EnumValueDescriptor desc) {
|
||||
if (desc.getType() != getDescriptor()) {
|
||||
throw new java.lang.IllegalArgumentException(
|
||||
"EnumValueDescriptor is not for this type.");
|
||||
}
|
||||
if (desc.getIndex() == -1) {
|
||||
return UNRECOGNIZED;
|
||||
}
|
||||
return VALUES[desc.getIndex()];
|
||||
}
|
||||
|
||||
private final int value;
|
||||
|
||||
private static com.google.protobuf.Descriptors.FileDescriptor descriptor;
|
||||
|
||||
static {
|
||||
String[] descriptorData = {"\n\016NodeType.proto*\223\001\n\010NodeType\022\014\n\010DOCUMEN"
|
||||
+ "T\020\000\022\013\n\007SECTION\020\001\022\021\n\rSUPER_SECTION\020\002\022\014\n\010H"
|
||||
+ "EADLINE\020\003\022\r\n\tPARAGRAPH\020\004\022\t\n\005TABLE\020\005\022\016\n\nT"
|
||||
+ "ABLE_CELL\020\006\022\t\n\005IMAGE\020\007\022\n\n\006HEADER\020\010\022\n\n\006FO"
|
||||
+ "OTER\020\tb\006proto3"};
|
||||
descriptor = com.google.protobuf.Descriptors.FileDescriptor.internalBuildGeneratedFileFrom(descriptorData, new com.google.protobuf.Descriptors.FileDescriptor[]{});
|
||||
descriptor.resolveAllFeaturesImmutable();
|
||||
private NodeType(int value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
// @@protoc_insertion_point(outer_class_scope)
|
||||
// @@protoc_insertion_point(enum_scope:NodeType)
|
||||
}
|
||||
|
||||
|
||||
public static com.google.protobuf.Descriptors.FileDescriptor
|
||||
getDescriptor() {
|
||||
return descriptor;
|
||||
}
|
||||
private static com.google.protobuf.Descriptors.FileDescriptor
|
||||
descriptor;
|
||||
static {
|
||||
java.lang.String[] descriptorData = {
|
||||
"\n\016NodeType.proto*\306\001\n\010NodeType\022\014\n\010DOCUMEN" +
|
||||
"T\020\000\022\013\n\007SECTION\020\001\022\021\n\rSUPER_SECTION\020\002\022\014\n\010H" +
|
||||
"EADLINE\020\003\022\r\n\tPARAGRAPH\020\004\022\t\n\005TABLE\020\005\022\016\n\nT" +
|
||||
"ABLE_CELL\020\006\022\t\n\005IMAGE\020\007\022\n\n\006HEADER\020\010\022\n\n\006FO" +
|
||||
"OTER\020\t\022\025\n\021TABLE_OF_CONTENTS\020\n\022\032\n\026TABLE_O" +
|
||||
"F_CONTENTS_ITEM\020\013BW\nFcom.knecon.fforesig" +
|
||||
"ht.service.layoutparser.internal.api.dat" +
|
||||
"a.redactionB\rNodeTypeProtob\006proto3"
|
||||
};
|
||||
descriptor = com.google.protobuf.Descriptors.FileDescriptor
|
||||
.internalBuildGeneratedFileFrom(descriptorData,
|
||||
new com.google.protobuf.Descriptors.FileDescriptor[] {
|
||||
});
|
||||
descriptor.resolveAllFeaturesImmutable();
|
||||
}
|
||||
|
||||
// @@protoc_insertion_point(outer_class_scope)
|
||||
}
|
||||
|
||||
@ -1,5 +1,9 @@
|
||||
syntax = "proto3";
|
||||
|
||||
option java_outer_classname = "DocumentPageProto";
|
||||
option java_package = "com.knecon.fforesight.service.layoutparser.internal.api.data.redaction";
|
||||
|
||||
|
||||
message AllDocumentPages {
|
||||
|
||||
repeated DocumentPage documentPages = 1;
|
||||
|
||||
@ -1,5 +1,8 @@
|
||||
syntax = "proto3";
|
||||
|
||||
option java_outer_classname = "DocumentPositionDataProto";
|
||||
option java_package = "com.knecon.fforesight.service.layoutparser.internal.api.data.redaction";
|
||||
|
||||
message AllDocumentPositionData {
|
||||
|
||||
repeated DocumentPositionData documentPositionData = 1;
|
||||
|
||||
@ -1,5 +1,9 @@
|
||||
syntax = "proto3";
|
||||
|
||||
option java_outer_classname = "DocumentStructureProto";
|
||||
option java_package = "com.knecon.fforesight.service.layoutparser.internal.api.data.redaction";
|
||||
|
||||
|
||||
import "EntryData.proto";
|
||||
|
||||
message DocumentStructure {
|
||||
|
||||
@ -1,5 +1,8 @@
|
||||
syntax = "proto3";
|
||||
|
||||
option java_outer_classname = "DocumentTextDataProto";
|
||||
option java_package = "com.knecon.fforesight.service.layoutparser.internal.api.data.redaction";
|
||||
|
||||
message AllDocumentTextData {
|
||||
|
||||
repeated DocumentTextData documentTextData = 1;
|
||||
|
||||
@ -3,6 +3,9 @@ syntax = "proto3";
|
||||
import "LayoutEngine.proto";
|
||||
import "NodeType.proto";
|
||||
|
||||
option java_outer_classname = "EntryDataProto";
|
||||
option java_package = "com.knecon.fforesight.service.layoutparser.internal.api.data.redaction";
|
||||
|
||||
message EntryData {
|
||||
// Type of the semantic node.
|
||||
NodeType type = 1;
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
syntax = "proto3";
|
||||
|
||||
option java_outer_classname = "LayoutEngineProto";
|
||||
option java_package = "com.knecon.fforesight.service.layoutparser.internal.api.data.redaction";
|
||||
enum LayoutEngine {
|
||||
ALGORITHM = 0;
|
||||
AI = 1;
|
||||
|
||||
@ -1,5 +1,8 @@
|
||||
syntax = "proto3";
|
||||
|
||||
option java_outer_classname = "NodeTypeProto";
|
||||
option java_package = "com.knecon.fforesight.service.layoutparser.internal.api.data.redaction";
|
||||
|
||||
enum NodeType {
|
||||
DOCUMENT = 0;
|
||||
SECTION = 1;
|
||||
@ -11,4 +14,6 @@ enum NodeType {
|
||||
IMAGE = 7;
|
||||
HEADER = 8;
|
||||
FOOTER = 9;
|
||||
TABLE_OF_CONTENTS = 10;
|
||||
TABLE_OF_CONTENTS_ITEM = 11;
|
||||
}
|
||||
|
||||
@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Minimum required protoc version
|
||||
MIN_VERSION="28.3"
|
||||
|
||||
# Get the installed protoc version
|
||||
INSTALLED_VERSION=$(protoc --version | awk '{print $2}')
|
||||
|
||||
# Function to compare versions
|
||||
version_lt() {
|
||||
[ "$(printf '%s\n' "$1" "$2" | sort -V | head -n1)" != "$1" ]
|
||||
}
|
||||
|
||||
# Check if protoc is installed and meets the minimum version
|
||||
if ! command -v protoc &> /dev/null; then
|
||||
echo "Error: protoc is not installed. Please install version $MIN_VERSION or later."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if version_lt "$INSTALLED_VERSION" "$MIN_VERSION"; then
|
||||
echo "Error: protoc version $INSTALLED_VERSION is too old. Please upgrade to version $MIN_VERSION or later."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Generate Java files from proto files
|
||||
protoc --java_out=../java ./*.proto
|
||||
@ -35,6 +35,4 @@ dependencies {
|
||||
implementation("org.commonmark:commonmark-ext-gfm-tables:0.22.0")
|
||||
implementation("com.pdftron:PDFNet:10.11.0")
|
||||
implementation("org.apache.commons:commons-text:1.12.0")
|
||||
|
||||
implementation("com.google.protobuf:protobuf-java-util:4.27.1")
|
||||
}
|
||||
|
||||
@ -39,10 +39,9 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Do
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineExtractorService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObject;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineValidationService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.outline.TOCEnrichmentService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.outline.TableOfContents;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.outline.SectionTreeBuilderService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.outline.SectionTree;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
|
||||
@ -107,7 +106,7 @@ public class LayoutParsingPipeline {
|
||||
GraphicExtractorService graphicExtractorService;
|
||||
OutlineExtractorService outlineExtractorService;
|
||||
OutlineValidationService outlineValidationService;
|
||||
TOCEnrichmentService tocEnrichmentService;
|
||||
SectionTreeBuilderService sectionTreeBuilderService;
|
||||
LayoutparserSettings settings;
|
||||
ClassificationService classificationService;
|
||||
|
||||
@ -345,14 +344,14 @@ public class LayoutParsingPipeline {
|
||||
|
||||
classificationService.classify(classificationDocument, layoutParsingType, identifier);
|
||||
|
||||
TableOfContents tableOfContents = outlineValidationService.createToC(classificationDocument);
|
||||
classificationDocument.setTableOfContents(tableOfContents);
|
||||
SectionTree sectionTree = outlineValidationService.createSectionTree(classificationDocument);
|
||||
classificationDocument.setSectionTree(sectionTree);
|
||||
|
||||
log.info("Building Sections for {}", identifier);
|
||||
|
||||
switch (layoutParsingType) {
|
||||
case CLARIFYND_PARAGRAPH_DEBUG, REDACT_MANAGER_PARAGRAPH_DEBUG -> sectionsBuilderService.buildParagraphDebugSections(classificationDocument);
|
||||
default -> tocEnrichmentService.assignSectionBlocksAndImages(classificationDocument);
|
||||
default -> sectionTreeBuilderService.assignSectionBlocksAndImages(classificationDocument);
|
||||
}
|
||||
|
||||
return classificationDocument;
|
||||
|
||||
@ -4,7 +4,7 @@ import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObjectTree;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.outline.TableOfContents;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.outline.SectionTree;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.StringFrequencyCounter;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.UnclassifiedText;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.visualization.LayoutDebugLayer;
|
||||
@ -31,6 +31,6 @@ public class ClassificationDocument {
|
||||
private long rulesVersion;
|
||||
|
||||
private OutlineObjectTree outlineObjectTree;
|
||||
private TableOfContents tableOfContents;
|
||||
private SectionTree sectionTree;
|
||||
|
||||
}
|
||||
|
||||
@ -14,6 +14,7 @@ public enum PageBlockType {
|
||||
PARAGRAPH_ITALIC,
|
||||
PARAGRAPH_UNKNOWN,
|
||||
OTHER,
|
||||
TABLE_OF_CONTENTS_HEADLINE,
|
||||
TABLE_OF_CONTENTS_ITEM,
|
||||
LIST_ITEM,
|
||||
TABLE;
|
||||
@ -35,7 +36,7 @@ public enum PageBlockType {
|
||||
public static int getHeadlineNumber(PageBlockType pageBlockType) {
|
||||
|
||||
return switch (pageBlockType) {
|
||||
case H1 -> 1;
|
||||
case H1, TABLE_OF_CONTENTS_HEADLINE -> 1;
|
||||
case H2 -> 2;
|
||||
case H3 -> 3;
|
||||
case H4 -> 4;
|
||||
@ -47,6 +48,6 @@ public enum PageBlockType {
|
||||
|
||||
public boolean isHeadline() {
|
||||
|
||||
return this.equals(H1) || this.equals(H2) || this.equals(H3) || this.equals(H4) || this.equals(H5) || this.equals(H6);
|
||||
return this.equals(H1) || this.equals(H2) || this.equals(H3) || this.equals(H4) || this.equals(H5) || this.equals(H6) || this.equals(TABLE_OF_CONTENTS_HEADLINE);
|
||||
}
|
||||
}
|
||||
|
||||
@ -11,6 +11,8 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Se
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SuperSection;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableOfContents;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableOfContentsItem;
|
||||
|
||||
public abstract class AbstractNodeVisitor implements NodeVisitor {
|
||||
|
||||
@ -83,6 +85,18 @@ public abstract class AbstractNodeVisitor implements NodeVisitor {
|
||||
visitChildren(tableCell);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(TableOfContents toc) {
|
||||
|
||||
visitChildren(toc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(TableOfContentsItem toci) {
|
||||
|
||||
visitChildren(toci);
|
||||
}
|
||||
|
||||
|
||||
protected void visitChildren(SemanticNode semanticNode) {
|
||||
|
||||
|
||||
@ -10,6 +10,10 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Se
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SuperSection;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableOfContents;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableOfContentsItem;
|
||||
|
||||
import software.amazon.awssdk.utils.builder.ToCopyableBuilder;
|
||||
|
||||
public interface NodeVisitor {
|
||||
|
||||
@ -42,4 +46,10 @@ public interface NodeVisitor {
|
||||
|
||||
void visit(TableCell tableCell);
|
||||
|
||||
|
||||
void visit(TableOfContents tableOfContents);
|
||||
|
||||
|
||||
void visit(TableOfContentsItem tableOfContentsItem);
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,41 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeTypeProto;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.NodeVisitor;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
|
||||
@Data
|
||||
@SuperBuilder
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
public class TableOfContents extends AbstractSemanticNode {
|
||||
|
||||
@Override
|
||||
public NodeTypeProto.NodeType getType() {
|
||||
|
||||
return NodeTypeProto.NodeType.TABLE_OF_CONTENTS;
|
||||
}
|
||||
|
||||
|
||||
public Headline getHeadline() {
|
||||
|
||||
return streamChildrenOfType(NodeTypeProto.NodeType.HEADLINE).map(node -> (Headline) node)
|
||||
.findFirst()
|
||||
.orElseGet(() -> getParent().getHeadline());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void accept(NodeVisitor visitor) {
|
||||
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,51 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeTypeProto;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.NodeVisitor;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
|
||||
@Data
|
||||
@SuperBuilder
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
public class TableOfContentsItem extends AbstractSemanticNode {
|
||||
|
||||
TextBlock leafTextBlock;
|
||||
|
||||
|
||||
@Override
|
||||
public NodeTypeProto.NodeType getType() {
|
||||
|
||||
return NodeTypeProto.NodeType.TABLE_OF_CONTENTS_ITEM;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean isLeaf() {
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void accept(NodeVisitor visitor) {
|
||||
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public TextBlock getTextBlock() {
|
||||
|
||||
return leafTextBlock;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,5 +1,6 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.model.outline;
|
||||
|
||||
import static com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType.TABLE_OF_CONTENTS_HEADLINE;
|
||||
import static com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType.getHeadlineNumber;
|
||||
|
||||
import java.util.ArrayList;
|
||||
@ -21,20 +22,20 @@ import lombok.extern.slf4j.Slf4j;
|
||||
public class OutlineValidationService {
|
||||
|
||||
@Observed(name = "OutlineValidationService", contextualName = "create-toc")
|
||||
public TableOfContents createToC(ClassificationDocument classificationDocument) {
|
||||
public SectionTree createSectionTree(ClassificationDocument classificationDocument) {
|
||||
|
||||
List<TextPageBlock> headlines = extractHeadlines(classificationDocument);
|
||||
|
||||
List<TableOfContentItem> mainSections = new ArrayList<>();
|
||||
Map<Integer, TableOfContentItem> lastItemsPerDepth = new HashMap<>();
|
||||
TableOfContentItem last = null;
|
||||
List<SectionTreeEntry> mainSections = new ArrayList<>();
|
||||
Map<Integer, SectionTreeEntry> lastItemsPerDepth = new HashMap<>();
|
||||
SectionTreeEntry last = null;
|
||||
TreeSet<Integer> depths = new TreeSet<>();
|
||||
|
||||
for (TextPageBlock current : headlines) {
|
||||
int currentDepth = getHeadlineNumber(current.getClassification());
|
||||
Integer parentDepth = depths.floor(currentDepth - 1);
|
||||
|
||||
var tocItem = new TableOfContentItem(current);
|
||||
var tocItem = new SectionTreeEntry(current);
|
||||
|
||||
if (parentDepth == null) {
|
||||
mainSections.add(tocItem);
|
||||
@ -44,14 +45,16 @@ public class OutlineValidationService {
|
||||
} else {
|
||||
assert last != null;
|
||||
int lastDepth = getHeadlineNumber(last.getHeadline().getClassification());
|
||||
|
||||
if (lastDepth < parentDepth) {
|
||||
if (last.getHeadline().getClassification().equals(TABLE_OF_CONTENTS_HEADLINE) && !current.getClassification().equals(TABLE_OF_CONTENTS_HEADLINE)) {
|
||||
// headline after toc should always start a main section
|
||||
parentDepth = 1;
|
||||
} else if (lastDepth < parentDepth) {
|
||||
parentDepth = lastDepth;
|
||||
} else if (lastDepth == currentDepth && last.getParent() != null) {
|
||||
parentDepth = getHeadlineNumber(last.getParent().getHeadline().getClassification());
|
||||
}
|
||||
|
||||
TableOfContentItem parent = lastItemsPerDepth.get(parentDepth);
|
||||
SectionTreeEntry parent = lastItemsPerDepth.get(parentDepth);
|
||||
parent.addChild(tocItem);
|
||||
}
|
||||
|
||||
@ -60,7 +63,10 @@ public class OutlineValidationService {
|
||||
depths.add(currentDepth);
|
||||
}
|
||||
|
||||
return new TableOfContents(mainSections);
|
||||
return new
|
||||
|
||||
SectionTree(mainSections);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -14,12 +14,12 @@ import lombok.RequiredArgsConstructor;
|
||||
|
||||
@Data
|
||||
@RequiredArgsConstructor
|
||||
public class TableOfContents implements Iterable<TableOfContentItem> {
|
||||
public class SectionTree implements Iterable<SectionTreeEntry> {
|
||||
|
||||
private List<TableOfContentItem> mainSections = new ArrayList<>();
|
||||
private List<SectionTreeEntry> mainSections = new ArrayList<>();
|
||||
|
||||
|
||||
public TableOfContents(List<TableOfContentItem> mainSections) {
|
||||
public SectionTree(List<SectionTreeEntry> mainSections) {
|
||||
|
||||
this.mainSections = mainSections;
|
||||
}
|
||||
@ -28,36 +28,36 @@ public class TableOfContents implements Iterable<TableOfContentItem> {
|
||||
public List<TextPageBlock> getAllTextPageBlocks() {
|
||||
|
||||
List<TextPageBlock> allTextPageBlocks = new ArrayList<>();
|
||||
for (TableOfContentItem item : mainSections) {
|
||||
for (SectionTreeEntry item : mainSections) {
|
||||
collectTextPageBlocks(item, allTextPageBlocks);
|
||||
}
|
||||
return allTextPageBlocks;
|
||||
}
|
||||
|
||||
|
||||
private void collectTextPageBlocks(TableOfContentItem item, List<TextPageBlock> textPageBlocks) {
|
||||
private void collectTextPageBlocks(SectionTreeEntry item, List<TextPageBlock> textPageBlocks) {
|
||||
|
||||
textPageBlocks.add(item.getHeadline());
|
||||
for (TableOfContentItem child : item.getChildren()) {
|
||||
for (SectionTreeEntry child : item.getChildren()) {
|
||||
collectTextPageBlocks(child, textPageBlocks);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public List<TableOfContentItem> getAllTableOfContentItems() {
|
||||
public List<SectionTreeEntry> getAllTableOfContentItems() {
|
||||
|
||||
List<TableOfContentItem> allItems = new ArrayList<>();
|
||||
for (TableOfContentItem item : mainSections) {
|
||||
List<SectionTreeEntry> allItems = new ArrayList<>();
|
||||
for (SectionTreeEntry item : mainSections) {
|
||||
collectTableOfContentItems(item, allItems);
|
||||
}
|
||||
return allItems;
|
||||
}
|
||||
|
||||
|
||||
private void collectTableOfContentItems(TableOfContentItem item, List<TableOfContentItem> allItems) {
|
||||
private void collectTableOfContentItems(SectionTreeEntry item, List<SectionTreeEntry> allItems) {
|
||||
|
||||
allItems.add(item);
|
||||
for (TableOfContentItem child : item.getChildren()) {
|
||||
for (SectionTreeEntry child : item.getChildren()) {
|
||||
collectTableOfContentItems(child, allItems);
|
||||
}
|
||||
}
|
||||
@ -65,7 +65,7 @@ public class TableOfContents implements Iterable<TableOfContentItem> {
|
||||
|
||||
private boolean containsBlock(TextPageBlock block) {
|
||||
|
||||
for (TableOfContentItem existingItem : this.getMainSections()) {
|
||||
for (SectionTreeEntry existingItem : this.getMainSections()) {
|
||||
if (existingItem.getHeadline().equals(block) || existingItem.contains(block)) {
|
||||
return true;
|
||||
}
|
||||
@ -74,9 +74,9 @@ public class TableOfContents implements Iterable<TableOfContentItem> {
|
||||
}
|
||||
|
||||
|
||||
private boolean containsItem(TableOfContentItem tocItem) {
|
||||
private boolean containsItem(SectionTreeEntry tocItem) {
|
||||
|
||||
for (TableOfContentItem existingItem : this.getMainSections()) {
|
||||
for (SectionTreeEntry existingItem : this.getMainSections()) {
|
||||
if (existingItem.equals(tocItem) || existingItem.contains(tocItem)) {
|
||||
return true;
|
||||
}
|
||||
@ -86,18 +86,18 @@ public class TableOfContents implements Iterable<TableOfContentItem> {
|
||||
|
||||
|
||||
@Override
|
||||
public @NonNull Iterator<TableOfContentItem> iterator() {
|
||||
public @NonNull Iterator<SectionTreeEntry> iterator() {
|
||||
|
||||
return new TableOfContentItemIterator(mainSections);
|
||||
return new SectionTreeEntryIterator(mainSections);
|
||||
}
|
||||
|
||||
|
||||
private static class TableOfContentItemIterator implements Iterator<TableOfContentItem> {
|
||||
private static class SectionTreeEntryIterator implements Iterator<SectionTreeEntry> {
|
||||
|
||||
private final Stack<Iterator<TableOfContentItem>> stack = new Stack<>();
|
||||
private final Stack<Iterator<SectionTreeEntry>> stack = new Stack<>();
|
||||
|
||||
|
||||
TableOfContentItemIterator(List<TableOfContentItem> mainSections) {
|
||||
SectionTreeEntryIterator(List<SectionTreeEntry> mainSections) {
|
||||
|
||||
stack.push(mainSections.iterator());
|
||||
}
|
||||
@ -112,10 +112,10 @@ public class TableOfContents implements Iterable<TableOfContentItem> {
|
||||
|
||||
|
||||
@Override
|
||||
public TableOfContentItem next() {
|
||||
public SectionTreeEntry next() {
|
||||
|
||||
ensureStackTopIsCurrent();
|
||||
TableOfContentItem currentItem = stack.peek().next();
|
||||
SectionTreeEntry currentItem = stack.peek().next();
|
||||
if (currentItem.getChildren() != null && !currentItem.getChildren().isEmpty()) {
|
||||
stack.push(currentItem.getChildren()
|
||||
.iterator());
|
||||
@ -23,28 +23,28 @@ import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
public class TOCEnrichmentService {
|
||||
public class SectionTreeBuilderService {
|
||||
|
||||
public void assignSectionBlocksAndImages(ClassificationDocument document) {
|
||||
|
||||
TableOfContents toc = document.getTableOfContents();
|
||||
Iterator<TableOfContentItem> iterator = toc.iterator();
|
||||
TableOfContentItem currentTOCItem = null;
|
||||
SectionTree toc = document.getSectionTree();
|
||||
Iterator<SectionTreeEntry> iterator = toc.iterator();
|
||||
SectionTreeEntry currentTOCItem = null;
|
||||
if (iterator.hasNext()) {
|
||||
currentTOCItem = iterator.next();
|
||||
}
|
||||
List<AbstractPageBlock> startBlocks = new ArrayList<>();
|
||||
List<ClassifiedImage> startImages = new ArrayList<>();
|
||||
TableOfContentItem currentSection = null;
|
||||
SectionTreeEntry currentSection = null;
|
||||
boolean foundFirstHeadline = false;
|
||||
|
||||
List<ClassificationHeader> headers = new ArrayList<>();
|
||||
List<ClassificationFooter> footers = new ArrayList<>();
|
||||
TablePageBlock previousTable = null;
|
||||
List<TableOfContentItem> lastFoundTOCItems = new ArrayList<>();
|
||||
List<SectionTreeEntry> lastFoundTOCItems = new ArrayList<>();
|
||||
|
||||
for (ClassificationPage page : document.getPages()) {
|
||||
List<TableOfContentItem> currentPageTOCItems = new ArrayList<>();
|
||||
List<SectionTreeEntry> currentPageTOCItems = new ArrayList<>();
|
||||
List<TextPageBlock> header = new ArrayList<>();
|
||||
List<TextPageBlock> footer = new ArrayList<>();
|
||||
for (AbstractPageBlock current : page.getTextBlocks()) {
|
||||
@ -101,7 +101,7 @@ public class TOCEnrichmentService {
|
||||
Double xMax = null;
|
||||
Double yMax = null;
|
||||
|
||||
for (TableOfContentItem tocItem : lastFoundTOCItems) {
|
||||
for (SectionTreeEntry tocItem : lastFoundTOCItems) {
|
||||
var headline = tocItem.getHeadline();
|
||||
|
||||
if (headline.getPage() != page.getPageNumber()) {
|
||||
@ -169,10 +169,10 @@ public class TOCEnrichmentService {
|
||||
}
|
||||
|
||||
if (!startBlocks.isEmpty() || !startImages.isEmpty()) {
|
||||
TableOfContentItem unassigned = new TableOfContentItem(null);
|
||||
SectionTreeEntry unassigned = new SectionTreeEntry(null);
|
||||
unassigned.setSectionBlocks(startBlocks);
|
||||
unassigned.setImages(startImages);
|
||||
document.getTableOfContents().getMainSections().add(0, unassigned);
|
||||
document.getSectionTree().getMainSections().add(0, unassigned);
|
||||
}
|
||||
document.setHeaders(headers);
|
||||
document.setFooters(footers);
|
||||
@ -5,6 +5,7 @@ import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.GenericSemanticNode;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
|
||||
@ -14,12 +15,18 @@ import lombok.EqualsAndHashCode;
|
||||
|
||||
@Data
|
||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
|
||||
public class TableOfContentItem {
|
||||
public class SectionTreeEntry {
|
||||
|
||||
public enum Type {
|
||||
SECTION,
|
||||
SUPER_SECTION,
|
||||
TOC_SECTION
|
||||
}
|
||||
|
||||
@EqualsAndHashCode.Include
|
||||
private TextPageBlock headline;
|
||||
private List<TableOfContentItem> children = new ArrayList<>();
|
||||
private TableOfContentItem parent;
|
||||
private List<SectionTreeEntry> children = new ArrayList<>();
|
||||
private SectionTreeEntry parent;
|
||||
|
||||
private List<AbstractPageBlock> sectionBlocks = new ArrayList<>();
|
||||
private List<ClassifiedImage> images = new ArrayList<>();
|
||||
@ -27,20 +34,32 @@ public class TableOfContentItem {
|
||||
private GenericSemanticNode section;
|
||||
|
||||
|
||||
public TableOfContentItem(TextPageBlock headline) {
|
||||
public SectionTreeEntry(TextPageBlock headline) {
|
||||
|
||||
this.headline = headline;
|
||||
}
|
||||
|
||||
|
||||
public void addChild(TableOfContentItem tableOfContentItem) {
|
||||
public Type getType() {
|
||||
|
||||
children.add(tableOfContentItem);
|
||||
tableOfContentItem.setParent(this);
|
||||
if (headline.getClassification().equals(PageBlockType.TABLE_OF_CONTENTS_HEADLINE)) {
|
||||
return Type.TOC_SECTION;
|
||||
}
|
||||
if (children.isEmpty()) {
|
||||
return Type.SECTION;
|
||||
}
|
||||
return Type.SUPER_SECTION;
|
||||
}
|
||||
|
||||
|
||||
public TableOfContentItem getSiblingBefore() {
|
||||
public void addChild(SectionTreeEntry sectionTreeEntry) {
|
||||
|
||||
children.add(sectionTreeEntry);
|
||||
sectionTreeEntry.setParent(this);
|
||||
}
|
||||
|
||||
|
||||
public SectionTreeEntry getSiblingBefore() {
|
||||
|
||||
if (parent != null) {
|
||||
int index = parent.getChildren().indexOf(this);
|
||||
@ -52,7 +71,7 @@ public class TableOfContentItem {
|
||||
}
|
||||
|
||||
|
||||
public TableOfContentItem getSiblingAfter() {
|
||||
public SectionTreeEntry getSiblingAfter() {
|
||||
|
||||
if (parent != null) {
|
||||
int index = parent.getChildren().indexOf(this);
|
||||
@ -69,7 +88,7 @@ public class TableOfContentItem {
|
||||
if (headline.equals(block)) {
|
||||
return true;
|
||||
}
|
||||
for (TableOfContentItem child : children) {
|
||||
for (SectionTreeEntry child : children) {
|
||||
if (child.contains(block)) {
|
||||
return true;
|
||||
}
|
||||
@ -78,12 +97,12 @@ public class TableOfContentItem {
|
||||
}
|
||||
|
||||
|
||||
public boolean contains(TableOfContentItem tocItem) {
|
||||
public boolean contains(SectionTreeEntry tocItem) {
|
||||
|
||||
if (this.equals(tocItem)) {
|
||||
return true;
|
||||
}
|
||||
for (TableOfContentItem child : children) {
|
||||
for (SectionTreeEntry child : children) {
|
||||
if (child.contains(tocItem)) {
|
||||
return true;
|
||||
}
|
||||
@ -61,7 +61,7 @@ public class TableOfContentsClassificationService {
|
||||
|
||||
if (end > i + 1) {
|
||||
if (textBlock.textBlock().getClassification() == null) {
|
||||
textBlock.textBlock().setClassification(PageBlockType.H1);
|
||||
textBlock.textBlock().setClassification(PageBlockType.TABLE_OF_CONTENTS_HEADLINE);
|
||||
}
|
||||
i = end;
|
||||
}
|
||||
|
||||
@ -23,6 +23,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.Classification
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationFooter;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationHeader;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.AbstractSemanticNode;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
@ -35,10 +36,11 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Im
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Paragraph;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableOfContentsItem;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.AtomicTextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.outline.TableOfContentItem;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.outline.SectionTreeEntry;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.IdBuilder;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.TextPositionOperations;
|
||||
@ -65,7 +67,7 @@ public class DocumentGraphFactory {
|
||||
|
||||
document.getPages()
|
||||
.forEach(context::buildAndAddPageWithCounter);
|
||||
addSectionsForToC(layoutParsingType, document, context, documentGraph);
|
||||
addSections(layoutParsingType, document, context, documentGraph);
|
||||
addHeaderAndFooterToEachPage(document, context);
|
||||
|
||||
documentGraph.setNumberOfPages(context.pages.size());
|
||||
@ -92,18 +94,18 @@ public class DocumentGraphFactory {
|
||||
}
|
||||
|
||||
|
||||
private void addSectionsForToC(LayoutParsingType layoutParsingType, ClassificationDocument classificationDocument, Context context, Document document) {
|
||||
private void addSections(LayoutParsingType layoutParsingType, ClassificationDocument classificationDocument, Context context, Document document) {
|
||||
|
||||
for (TableOfContentItem tocItem : classificationDocument.getTableOfContents()) {
|
||||
GenericSemanticNode parent = tocItem.getParent() == null ? null : tocItem.getParent().getSection();
|
||||
for (SectionTreeEntry sectionTreeEntry : classificationDocument.getSectionTree()) {
|
||||
GenericSemanticNode parent = sectionTreeEntry.getParent() == null ? null : sectionTreeEntry.getParent().getSection();
|
||||
Optional<GenericSemanticNode> section = SectionNodeFactory.addSection(layoutParsingType,
|
||||
parent,
|
||||
tocItem.getChildren().isEmpty(),
|
||||
tocItem.getNonEmptySectionBlocks(),
|
||||
tocItem.getImages(),
|
||||
sectionTreeEntry.getType(),
|
||||
sectionTreeEntry.getNonEmptySectionBlocks(),
|
||||
sectionTreeEntry.getImages(),
|
||||
context,
|
||||
document);
|
||||
tocItem.setSection(section.orElse(null));
|
||||
sectionTreeEntry.setSection(section.orElse(null));
|
||||
}
|
||||
}
|
||||
|
||||
@ -121,6 +123,8 @@ public class DocumentGraphFactory {
|
||||
node = Headline.builder().documentTree(context.getDocumentTree()).build();
|
||||
} else if (originalTextBlock.isToDuplicate() && layoutParsingType.equals(LayoutParsingType.REDACT_MANAGER)) {
|
||||
node = DuplicatedParagraph.builder().documentTree(context.getDocumentTree()).build();
|
||||
} else if (originalTextBlock.getClassification().equals(PageBlockType.TABLE_OF_CONTENTS_ITEM)) {
|
||||
node = TableOfContentsItem.builder().documentTree(context.getDocumentTree()).build();
|
||||
} else {
|
||||
node = Paragraph.builder().documentTree(context.getDocumentTree()).build();
|
||||
}
|
||||
|
||||
@ -17,7 +17,9 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Do
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.GenericSemanticNode;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Section;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SuperSection;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableOfContents;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.outline.SectionTreeEntry;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.TableMergingUtility;
|
||||
@ -29,7 +31,7 @@ public class SectionNodeFactory {
|
||||
|
||||
public Optional<GenericSemanticNode> addSection(LayoutParsingType layoutParsingType,
|
||||
GenericSemanticNode parentNode,
|
||||
boolean isLeaf,
|
||||
SectionTreeEntry.Type type,
|
||||
List<AbstractPageBlock> pageBlocks,
|
||||
List<ClassifiedImage> images,
|
||||
DocumentGraphFactory.Context context,
|
||||
@ -48,12 +50,11 @@ public class SectionNodeFactory {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
AbstractSemanticNode section;
|
||||
if (isLeaf) {
|
||||
section = Section.builder().documentTree(context.getDocumentTree()).build();
|
||||
} else {
|
||||
section = SuperSection.builder().documentTree(context.getDocumentTree()).build();
|
||||
}
|
||||
AbstractSemanticNode section = switch (type) {
|
||||
case SECTION -> Section.builder().documentTree(context.getDocumentTree()).build();
|
||||
case SUPER_SECTION -> SuperSection.builder().documentTree(context.getDocumentTree()).build();
|
||||
case TOC_SECTION -> TableOfContents.builder().documentTree(context.getDocumentTree()).build();
|
||||
};
|
||||
|
||||
context.getSections().add(section);
|
||||
|
||||
@ -64,13 +65,14 @@ public class SectionNodeFactory {
|
||||
if (containsTablesAndTextBlocks) {
|
||||
splitPageBlocksIntoSubSections(pageBlocks).forEach(subSectionPageBlocks -> addSection(layoutParsingType,
|
||||
section,
|
||||
true,
|
||||
SectionTreeEntry.Type.SECTION,
|
||||
subSectionPageBlocks,
|
||||
emptyList(),
|
||||
context,
|
||||
document));
|
||||
} else if (!isLeaf) {
|
||||
addSection(layoutParsingType, section, true, pageBlocks, emptyList(), context, document);
|
||||
} else if (type.equals(SectionTreeEntry.Type.SUPER_SECTION)) {
|
||||
// If a SuperSection contains more blocks than just a headline, we add a Section which contains the remaining textblocks.
|
||||
addSection(layoutParsingType, section, SectionTreeEntry.Type.SECTION, pageBlocks, emptyList(), context, document);
|
||||
} else {
|
||||
addTablesAndParagraphsAndHeadlinesToSection(layoutParsingType, pageBlocks, context, section, document);
|
||||
}
|
||||
|
||||
@ -14,6 +14,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Pa
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.outline.SectionTreeEntry;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.Word;
|
||||
@ -120,7 +121,7 @@ public class TableNodeFactory {
|
||||
} else if (firstTextBlockIsHeadline(cell)) {
|
||||
SectionNodeFactory.addSection(layoutParsingType,
|
||||
tableCell,
|
||||
true,
|
||||
SectionTreeEntry.Type.SECTION,
|
||||
cell.getTextBlocks()
|
||||
.stream()
|
||||
.map(tb -> (AbstractPageBlock) tb)
|
||||
|
||||
@ -12,6 +12,7 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.EntryDataProto.EntryData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPageProto.AllDocumentPages;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPageProto.DocumentPage;
|
||||
@ -20,7 +21,6 @@ import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.Do
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionDataProto.DocumentPositionData.Position;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructureProto.DocumentStructure;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructureWrapper;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.EntryDataProto.EntryData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngineProto.LayoutEngine;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.mapper;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
@ -9,9 +8,9 @@ import java.util.NoSuchElementException;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPageProto.DocumentPage;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.EntryDataProto.EntryData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionDataProto.AllDocumentPositionData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextDataProto.AllDocumentTextData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.EntryDataProto.EntryData;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.DuplicatedParagraph;
|
||||
@ -26,6 +25,8 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Se
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SuperSection;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableOfContents;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableOfContentsItem;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.AtomicTextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlockCollector;
|
||||
@ -70,13 +71,15 @@ public class DocumentGraphMapper {
|
||||
SemanticNode node = switch (entryData.getType()) {
|
||||
case SECTION -> buildSection(context);
|
||||
case SUPER_SECTION -> buildSuperSection(context);
|
||||
case PARAGRAPH -> buildParagraph(context, entryData.getProperties());
|
||||
case PARAGRAPH -> buildParagraph(context, entryData.getPropertiesMap());
|
||||
case HEADLINE -> buildHeadline(context);
|
||||
case HEADER -> buildHeader(context);
|
||||
case FOOTER -> buildFooter(context);
|
||||
case TABLE -> buildTable(context, entryData.getProperties());
|
||||
case TABLE_CELL -> buildTableCell(context, entryData.getProperties());
|
||||
case IMAGE -> buildImage(context, entryData.getProperties(), entryData.getPageNumbersList());
|
||||
case TABLE -> buildTable(context, entryData.getPropertiesMap());
|
||||
case TABLE_CELL -> buildTableCell(context, entryData.getPropertiesMap());
|
||||
case IMAGE -> buildImage(context, entryData.getPropertiesMap(), entryData.getPageNumbersList());
|
||||
case TABLE_OF_CONTENTS -> buildTableOfContents(context);
|
||||
case TABLE_OF_CONTENTS_ITEM -> buildTableOfContentsItem(context);
|
||||
default -> throw new UnsupportedOperationException("Not yet implemented for type " + entryData.getType());
|
||||
};
|
||||
|
||||
@ -100,6 +103,18 @@ public class DocumentGraphMapper {
|
||||
}
|
||||
|
||||
|
||||
private static SemanticNode buildTableOfContents(Context context) {
|
||||
|
||||
return TableOfContents.builder().documentTree(context.documentTree).build();
|
||||
}
|
||||
|
||||
|
||||
private static SemanticNode buildTableOfContentsItem(Context context) {
|
||||
|
||||
return TableOfContentsItem.builder().documentTree(context.documentTree).build();
|
||||
}
|
||||
|
||||
|
||||
private Headline buildHeadline(Context context) {
|
||||
|
||||
return Headline.builder().documentTree(context.documentTree).build();
|
||||
@ -182,13 +197,11 @@ public class DocumentGraphMapper {
|
||||
|
||||
private AtomicTextBlock getAtomicTextBlock(Context context, SemanticNode parent, Long atomicTextBlockId) {
|
||||
|
||||
return AtomicTextBlock.fromAtomicTextBlockData(context.documentTextDataBlockData.getDocumentTextDataList()
|
||||
.get(Math.toIntExact(atomicTextBlockId)),
|
||||
context.atomicPositionBlockData.getDocumentPositionDataList()
|
||||
.get(Math.toIntExact(atomicTextBlockId)),
|
||||
return AtomicTextBlock.fromAtomicTextBlockData(context.documentTextDataBlockData.getDocumentTextDataList().get(Math.toIntExact(atomicTextBlockId)),
|
||||
context.atomicPositionBlockData.getDocumentPositionDataList().get(Math.toIntExact(atomicTextBlockId)),
|
||||
parent,
|
||||
getPage(context.documentTextDataBlockData.getDocumentTextDataList()
|
||||
.get(Math.toIntExact(atomicTextBlockId)).getPage(), context));
|
||||
getPage(context.documentTextDataBlockData.getDocumentTextDataList().get(Math.toIntExact(atomicTextBlockId)).getPage(),
|
||||
context));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -38,6 +38,7 @@ public class LayoutGridService {
|
||||
layoutGrid.setVisibleByDefault(layerVisibilityDefaultValue);
|
||||
|
||||
document.getLayoutDebugLayer().addSentenceVisualization(document.getTextBlock());
|
||||
document.getLayoutDebugLayer().addOutlineHeadlines(document);
|
||||
|
||||
if (document.getLayoutDebugLayer().isActive()) {
|
||||
viewerDocumentService.addLayerGroups(originFile, destinationFile, List.of(layoutGrid, document.getLayoutDebugLayer()), outline);
|
||||
@ -54,12 +55,13 @@ public class LayoutGridService {
|
||||
.peek(layoutGrid::addTreeId)
|
||||
.forEach(semanticNode -> {
|
||||
switch (semanticNode.getType()) {
|
||||
case SECTION, SUPER_SECTION -> layoutGrid.addSection(semanticNode);
|
||||
case SECTION, SUPER_SECTION, TABLE_OF_CONTENTS -> layoutGrid.addSection(semanticNode);
|
||||
case HEADLINE -> layoutGrid.addHeadline((Headline) semanticNode);
|
||||
case PARAGRAPH -> layoutGrid.addParagraph((Paragraph) semanticNode);
|
||||
case TABLE -> layoutGrid.addTable((Table) semanticNode);
|
||||
case IMAGE -> layoutGrid.addImage((Image) semanticNode);
|
||||
case HEADER, FOOTER -> layoutGrid.addHeaderOrFooter(semanticNode);
|
||||
case TABLE_OF_CONTENTS_ITEM -> layoutGrid.addTableOfContentsItem(semanticNode);
|
||||
}
|
||||
});
|
||||
return layoutGrid;
|
||||
|
||||
@ -111,8 +111,8 @@ public class PdfVisualisationUtility {
|
||||
return DrawingOptions.builder().stroke(true).strokeColor(switch (entry.getType()) {
|
||||
case DOCUMENT -> Color.LIGHT_GRAY;
|
||||
case HEADER, FOOTER -> Color.GREEN;
|
||||
case PARAGRAPH -> Color.BLUE;
|
||||
case SUPER_SECTION, SECTION -> Color.BLACK;
|
||||
case PARAGRAPH, TABLE_OF_CONTENTS_ITEM -> Color.BLUE;
|
||||
case SUPER_SECTION, SECTION, TABLE_OF_CONTENTS -> Color.BLACK;
|
||||
case HEADLINE -> Color.RED;
|
||||
case TABLE -> Color.ORANGE;
|
||||
case TABLE_CELL -> Color.GRAY;
|
||||
|
||||
@ -15,13 +15,17 @@ import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent;
|
||||
import org.checkerframework.checker.units.qual.C;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngineProto;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeTypeProto;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.BoundingBox;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.Line;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.Zone;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.TextRange;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObject;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
|
||||
@ -379,4 +383,19 @@ public class LayoutDebugLayer extends LayoutDebugLayerConfig {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void addOutlineHeadlines(Document document) {
|
||||
|
||||
if (!active) {
|
||||
return;
|
||||
}
|
||||
|
||||
document.streamAllSubNodes()
|
||||
.filter(node -> node.getType().equals(NodeTypeProto.NodeType.HEADLINE))
|
||||
.filter(node -> node.getEngines().contains(LayoutEngineProto.LayoutEngine.OUTLINE))
|
||||
.forEach(headline -> headline.getBBox()
|
||||
.forEach((page, bbox) -> getOrCreateVisualizationsOnPage(page.getNumber(), this.outlineHeadlines).getColoredRectangles()
|
||||
.add(new ColoredRectangle(bbox, HEADLINE_COLOR, LINE_WIDTH))));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -26,6 +26,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Se
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SuperSection;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableOfContents;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
|
||||
import com.knecon.fforesight.service.viewerdoc.layers.LayoutGridLayerConfig;
|
||||
import com.knecon.fforesight.service.viewerdoc.model.ColoredLine;
|
||||
@ -72,10 +73,12 @@ public class LayoutGrid extends LayoutGridLayerConfig {
|
||||
|
||||
public void addHeadline(Headline headline) {
|
||||
|
||||
addAsRectangle(headline, headlines, HEADLINE_COLOR);
|
||||
if (headline.getEngines().contains(LayoutEngine.OUTLINE)) {
|
||||
addAsRectangle(headline, outlineHeadlines, HEADLINE_COLOR);
|
||||
if (headline.getParent().getType().equals(NodeTypeProto.NodeType.TABLE_OF_CONTENTS)) {
|
||||
addAsRectangle(headline, toc, HEADLINE_COLOR);
|
||||
} else {
|
||||
addAsRectangle(headline, headlines, HEADLINE_COLOR);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -88,19 +91,10 @@ public class LayoutGrid extends LayoutGridLayerConfig {
|
||||
public void addTreeId(SemanticNode semanticNode) {
|
||||
|
||||
Page page = semanticNode.getFirstPage();
|
||||
if (semanticNode.getBBox()
|
||||
.get(page) == null) {
|
||||
if (semanticNode.getBBox().get(page) == null) {
|
||||
return;
|
||||
}
|
||||
addPlacedText(page,
|
||||
semanticNode.getBBox()
|
||||
.get(page),
|
||||
semanticNode.getBBox()
|
||||
.get(page),
|
||||
buildTreeIdString(semanticNode),
|
||||
1,
|
||||
treeIds,
|
||||
TREEID_COLOR);
|
||||
addPlacedText(page, semanticNode.getBBox().get(page), semanticNode.getBBox().get(page), buildTreeIdString(semanticNode), 1, treeIds, TREEID_COLOR);
|
||||
}
|
||||
|
||||
|
||||
@ -124,20 +118,19 @@ public class LayoutGrid extends LayoutGridLayerConfig {
|
||||
public void addSection(SemanticNode section) {
|
||||
|
||||
Map<Page, Rectangle2D> bBoxMap = section.getBBox();
|
||||
|
||||
Color color = section.getType().equals(NodeTypeProto.NodeType.TABLE_OF_CONTENTS) ? TOC_COLOR : SECTION_COLOR;
|
||||
List<SemanticNode> subSections = section.streamAllSubNodesOfType(NodeTypeProto.NodeType.SECTION)
|
||||
.toList();
|
||||
Integer maxChildDepth = subSections.stream()
|
||||
.map(node -> node.getTreeId().size())
|
||||
.max(Integer::compareTo)
|
||||
.orElse(section.getTreeId().size());
|
||||
.max(Integer::compareTo).orElse(section.getTreeId().size());
|
||||
int ownDepth = section.getTreeId().size();
|
||||
|
||||
Page firstPage = section.getFirstPage();
|
||||
String treeIdString = buildTreeIdString(section);
|
||||
|
||||
if (bBoxMap.values().size() == 1) {
|
||||
handleSinglePage(section, firstPage, bBoxMap.get(firstPage), treeIdString, maxChildDepth, ownDepth);
|
||||
handleSinglePage(section, firstPage, bBoxMap.get(firstPage), treeIdString, maxChildDepth, ownDepth, color);
|
||||
return;
|
||||
}
|
||||
List<Page> pagesInOrder = bBoxMap.keySet()
|
||||
@ -145,12 +138,12 @@ public class LayoutGrid extends LayoutGridLayerConfig {
|
||||
.sorted(Comparator.comparingInt(Page::getNumber))
|
||||
.collect(Collectors.toList());
|
||||
pagesInOrder.remove(0);
|
||||
handleFirstPageOfSection(section, firstPage, bBoxMap.get(firstPage), treeIdString, maxChildDepth, ownDepth);
|
||||
handleFirstPageOfSection(section, firstPage, bBoxMap.get(firstPage), treeIdString, maxChildDepth, ownDepth, color);
|
||||
for (Page middlePage : pagesInOrder.subList(0, pagesInOrder.size() - 1)) {
|
||||
handleForMiddlePageOfSection(section, middlePage, bBoxMap.get(middlePage), treeIdString, maxChildDepth, ownDepth);
|
||||
handleForMiddlePageOfSection(section, middlePage, bBoxMap.get(middlePage), treeIdString, maxChildDepth, ownDepth, color);
|
||||
}
|
||||
var lastPage = pagesInOrder.remove(pagesInOrder.size() - 1);
|
||||
handleLastPageOfSection(section, lastPage, bBoxMap.get(lastPage), treeIdString, maxChildDepth, ownDepth);
|
||||
handleLastPageOfSection(section, lastPage, bBoxMap.get(lastPage), treeIdString, maxChildDepth, ownDepth, color);
|
||||
}
|
||||
|
||||
|
||||
@ -232,33 +225,45 @@ public class LayoutGrid extends LayoutGridLayerConfig {
|
||||
}
|
||||
|
||||
|
||||
private void handleSinglePage(SemanticNode semanticNode, Page page, Rectangle2D rectangle2D, String treeIdString, Integer maxChildDepth, Integer ownDepth) {
|
||||
private void handleSinglePage(SemanticNode semanticNode, Page page, Rectangle2D rectangle2D, String treeIdString, Integer maxChildDepth, Integer ownDepth, Color color) {
|
||||
|
||||
RectangleAndLinesResult result = createLinesAndPlaceText(semanticNode, page, rectangle2D, treeIdString, maxChildDepth, ownDepth);
|
||||
// add string to top line
|
||||
var firstLine = result.pageLines().remove(0);
|
||||
result.coloredLines().add(new ColoredLine(firstLine, SECTION_COLOR, LINE_WIDTH));
|
||||
result.coloredLines().add(new ColoredLine(firstLine, color, LINE_WIDTH));
|
||||
for (Line2D line : result.pageLines()) {
|
||||
result.coloredLines().add(new ColoredLine(line, SECTION_COLOR, LINE_WIDTH));
|
||||
result.coloredLines().add(new ColoredLine(line, color, LINE_WIDTH));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void handleFirstPageOfSection(SemanticNode semanticNode, Page firstPage, Rectangle2D rectangle2D, String treeIdString, Integer maxChildDepth, Integer ownDepth) {
|
||||
private void handleFirstPageOfSection(SemanticNode semanticNode,
|
||||
Page firstPage,
|
||||
Rectangle2D rectangle2D,
|
||||
String treeIdString,
|
||||
Integer maxChildDepth,
|
||||
Integer ownDepth,
|
||||
Color color) {
|
||||
|
||||
RectangleAndLinesResult result = createLinesAndPlaceText(semanticNode, firstPage, rectangle2D, treeIdString, maxChildDepth, ownDepth);
|
||||
// remove bottom line
|
||||
result.pageLines().remove(2);
|
||||
// add string to top line
|
||||
var firstLine = result.pageLines().remove(0);
|
||||
result.coloredLines().add(new ColoredLine(firstLine, SECTION_COLOR, LINE_WIDTH));
|
||||
result.coloredLines().add(new ColoredLine(firstLine, color, LINE_WIDTH));
|
||||
for (Line2D line : result.pageLines()) {
|
||||
result.coloredLines().add(new ColoredLine(line, SECTION_COLOR, LINE_WIDTH));
|
||||
result.coloredLines().add(new ColoredLine(line, color, LINE_WIDTH));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void handleForMiddlePageOfSection(SemanticNode semanticNode, Page middlePage, Rectangle2D rectangle2D, String treeIdString, Integer maxChildDepth, Integer ownDepth) {
|
||||
private void handleForMiddlePageOfSection(SemanticNode semanticNode,
|
||||
Page middlePage,
|
||||
Rectangle2D rectangle2D,
|
||||
String treeIdString,
|
||||
Integer maxChildDepth,
|
||||
Integer ownDepth,
|
||||
Color color) {
|
||||
|
||||
RectangleAndLinesResult result = createLinesAndPlaceText(semanticNode, middlePage, rectangle2D, treeIdString, maxChildDepth, ownDepth);
|
||||
// remove top line
|
||||
@ -267,23 +272,29 @@ public class LayoutGrid extends LayoutGridLayerConfig {
|
||||
result.pageLines().remove(1);
|
||||
// add string to left line
|
||||
var leftLine = result.pageLines().remove(1);
|
||||
result.coloredLines().add(new ColoredLine(leftLine, SECTION_COLOR, LINE_WIDTH));
|
||||
result.coloredLines().add(new ColoredLine(leftLine, color, LINE_WIDTH));
|
||||
for (Line2D line : result.pageLines()) {
|
||||
result.coloredLines().add(new ColoredLine(line, SECTION_COLOR, LINE_WIDTH));
|
||||
result.coloredLines().add(new ColoredLine(line, color, LINE_WIDTH));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void handleLastPageOfSection(SemanticNode semanticNode, Page lastPage, Rectangle2D rectangle2D, String treeIdString, Integer maxChildDepth, Integer ownDepth) {
|
||||
private void handleLastPageOfSection(SemanticNode semanticNode,
|
||||
Page lastPage,
|
||||
Rectangle2D rectangle2D,
|
||||
String treeIdString,
|
||||
Integer maxChildDepth,
|
||||
Integer ownDepth,
|
||||
Color color) {
|
||||
|
||||
RectangleAndLinesResult result = createLinesAndPlaceText(semanticNode, lastPage, rectangle2D, treeIdString, maxChildDepth, ownDepth);
|
||||
// remove top line
|
||||
result.pageLines().remove(0);
|
||||
// add string to left line
|
||||
var leftLine = result.pageLines().remove(2);
|
||||
result.coloredLines().add(new ColoredLine(leftLine, SECTION_COLOR, LINE_WIDTH));
|
||||
result.coloredLines().add(new ColoredLine(leftLine, color, LINE_WIDTH));
|
||||
for (Line2D line : result.pageLines()) {
|
||||
result.coloredLines().add(new ColoredLine(line, SECTION_COLOR, LINE_WIDTH));
|
||||
result.coloredLines().add(new ColoredLine(line, color, LINE_WIDTH));
|
||||
}
|
||||
}
|
||||
|
||||
@ -295,14 +306,14 @@ public class LayoutGrid extends LayoutGridLayerConfig {
|
||||
Integer maxChildDepth,
|
||||
Integer ownDepth) {
|
||||
|
||||
List<ColoredLine> coloredLines = getOrCreateVisualizationsOnPage(page.getNumber(), sections).getColoredLines();
|
||||
Visualizations visualizations = semanticNode.getType().equals(NodeTypeProto.NodeType.TABLE_OF_CONTENTS) ? toc : sections;
|
||||
List<ColoredLine> coloredLines = getOrCreateVisualizationsOnPage(page.getNumber(), visualizations).getColoredLines();
|
||||
int lineWidthModifier = maxChildDepth - ownDepth;
|
||||
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox()
|
||||
.get(page), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier));
|
||||
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(page), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier));
|
||||
|
||||
SemanticNode highestParent = semanticNode.getHighestParent();
|
||||
Rectangle2D highestParentRect = rectangleMap.get(new RectangleIdentifier(highestParent.getTreeId(), page.getNumber()));
|
||||
addPlacedText(page, rectangle2D, highestParentRect, treeIdString, maxChildDepth, sections, SECTION_COLOR);
|
||||
addPlacedText(page, rectangle2D, highestParentRect, treeIdString, maxChildDepth, visualizations, SECTION_COLOR);
|
||||
var lastPageLines = createLinesFromRectangle(r, page.getRotation());
|
||||
|
||||
if (semanticNode instanceof SuperSection) {
|
||||
@ -347,8 +358,7 @@ public class LayoutGrid extends LayoutGridLayerConfig {
|
||||
List<Double> ys = yStream.collect(Collectors.toList());
|
||||
ys.remove(0);
|
||||
|
||||
Rectangle2D tableBBox = table.getBBox()
|
||||
.get(page);
|
||||
Rectangle2D tableBBox = table.getBBox().get(page);
|
||||
List<ColoredLine> coloredLines = getOrCreateVisualizationsOnPage(page.getNumber(), tables).getColoredLines();
|
||||
|
||||
xs.forEach(x -> {
|
||||
@ -384,6 +394,12 @@ public class LayoutGrid extends LayoutGridLayerConfig {
|
||||
}
|
||||
|
||||
|
||||
public void addTableOfContentsItem(SemanticNode semanticNode) {
|
||||
|
||||
addAsRectangle(semanticNode, toc, PARAGRAPH_COLOR);
|
||||
}
|
||||
|
||||
|
||||
private record RectangleAndLinesResult(List<ColoredLine> coloredLines, Rectangle2D rectangle, List<Line2D> pageLines) {
|
||||
|
||||
}
|
||||
|
||||
@ -45,7 +45,6 @@ dependencies {
|
||||
// for integration testing only
|
||||
testImplementation(project(":viewer-doc-processor"))
|
||||
testImplementation(project(":layoutparser-service-internal-api"))
|
||||
testImplementation("com.google.protobuf:protobuf-java-util:4.27.1")
|
||||
|
||||
testImplementation("org.springframework.boot:spring-boot-starter-amqp:${springBootStarterVersion}")
|
||||
testImplementation("org.springframework.boot:spring-boot-starter-test:${springBootStarterVersion}")
|
||||
|
||||
@ -10,7 +10,6 @@ import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
@ -28,7 +27,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Se
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SuperSection;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObject;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObjectTree;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.outline.TableOfContents;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.outline.SectionTree;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
|
||||
@ -100,10 +99,10 @@ public class OutlineDetectionTest extends AbstractTest {
|
||||
.flatMap(Collection::stream)
|
||||
.allMatch(OutlineObject::isFound));
|
||||
|
||||
TableOfContents tableOfContents = classificationDocument.getTableOfContents();
|
||||
SectionTree sectionTree = classificationDocument.getSectionTree();
|
||||
|
||||
assertEquals(tableOfContents.getMainSections().size(), 9);
|
||||
assertEquals(tableOfContents.getMainSections().subList(1, 9)
|
||||
assertEquals(sectionTree.getMainSections().size(), 9);
|
||||
assertEquals(sectionTree.getMainSections().subList(1, 9)
|
||||
.stream()
|
||||
.map(tableOfContentItem -> sanitizeString(tableOfContentItem.getHeadline().toString()))
|
||||
.toList(),
|
||||
@ -121,14 +120,14 @@ public class OutlineDetectionTest extends AbstractTest {
|
||||
// assertEquals(tableOfContents.getMainSections().get(6).getImages().size(), 1);
|
||||
// assertEquals(tableOfContents.getMainSections().get(8).getChildren().get(2).getChildren().get(0).getChildren().get(2).getImages().size(), 1);
|
||||
|
||||
assertTrue(tableOfContents.getAllTableOfContentItems()
|
||||
assertTrue(sectionTree.getAllTableOfContentItems()
|
||||
.stream()
|
||||
.allMatch(tableOfContentItem -> tableOfContentItem.getSection() != null));
|
||||
assertTrue(tableOfContents.getAllTableOfContentItems()
|
||||
assertTrue(sectionTree.getAllTableOfContentItems()
|
||||
.stream()
|
||||
.filter(tableOfContentItem -> tableOfContentItem.getChildren().isEmpty())
|
||||
.allMatch(tableOfContentItem -> tableOfContentItem.getSection() instanceof Section));
|
||||
assertTrue(tableOfContents.getAllTableOfContentItems()
|
||||
assertTrue(sectionTree.getAllTableOfContentItems()
|
||||
.stream()
|
||||
.filter(tableOfContentItem -> !tableOfContentItem.getChildren().isEmpty())
|
||||
.allMatch(tableOfContentItem -> tableOfContentItem.getSection() instanceof SuperSection));
|
||||
|
||||
@ -150,14 +150,14 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
||||
var tableServiceResponse = objectMapper.readValue(cvTablesResource.getInputStream(), TableServiceResponse.class);
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile(), tableServiceResponse);
|
||||
assertThat(document.getTableOfContents().getAllTableOfContentItems()
|
||||
assertThat(document.getSectionTree().getAllTableOfContentItems()
|
||||
.stream()
|
||||
.flatMap(tocItem -> tocItem.getSectionBlocks()
|
||||
.stream()
|
||||
.filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
|
||||
.map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
|
||||
.toList()).isNotEmpty();
|
||||
var tables = document.getTableOfContents().getAllTableOfContentItems()
|
||||
var tables = document.getSectionTree().getAllTableOfContentItems()
|
||||
.stream()
|
||||
.flatMap(tocItem -> tocItem.getSectionBlocks()
|
||||
.stream()
|
||||
@ -203,14 +203,14 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Spanning Cells - Page131_S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||
assertThat(document.getTableOfContents().getAllTableOfContentItems()
|
||||
assertThat(document.getSectionTree().getAllTableOfContentItems()
|
||||
.stream()
|
||||
.flatMap(tocItem -> tocItem.getSectionBlocks()
|
||||
.stream()
|
||||
.filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
|
||||
.map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
|
||||
.toList()).isNotEmpty();
|
||||
TablePageBlock table = document.getTableOfContents().getAllTableOfContentItems()
|
||||
TablePageBlock table = document.getSectionTree().getAllTableOfContentItems()
|
||||
.stream()
|
||||
.flatMap(tocItem -> tocItem.getSectionBlocks()
|
||||
.stream()
|
||||
@ -233,14 +233,14 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
||||
"files/syngenta/CustomerFiles/SinglePages/Merge Table - Page5_26 A8637C - EU AIR3 - LCP Section 10 - Ecotoxicological studies on the plant protection product - Reference list.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||
assertThat(document.getTableOfContents().getAllTableOfContentItems()
|
||||
assertThat(document.getSectionTree().getAllTableOfContentItems()
|
||||
.stream()
|
||||
.flatMap(tocItem -> tocItem.getSectionBlocks()
|
||||
.stream()
|
||||
.filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
|
||||
.map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
|
||||
.toList()).isNotEmpty();
|
||||
TablePageBlock firstTable = document.getTableOfContents().getAllTableOfContentItems()
|
||||
TablePageBlock firstTable = document.getSectionTree().getAllTableOfContentItems()
|
||||
.stream()
|
||||
.flatMap(tocItem -> tocItem.getSectionBlocks()
|
||||
.stream()
|
||||
@ -250,7 +250,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
||||
.get(0);
|
||||
assertThat(firstTable.getColCount()).isEqualTo(8);
|
||||
assertThat(firstTable.getRowCount()).isEqualTo(1);
|
||||
TablePageBlock secondTable = document.getTableOfContents().getAllTableOfContentItems()
|
||||
TablePageBlock secondTable = document.getSectionTree().getAllTableOfContentItems()
|
||||
.stream()
|
||||
.flatMap(tocItem -> tocItem.getSectionBlocks()
|
||||
.stream()
|
||||
@ -280,14 +280,14 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
||||
"files/syngenta/CustomerFiles/SinglePages/Merge Multi Page Table - Page4_Page5_51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||
assertThat(document.getTableOfContents().getAllTableOfContentItems()
|
||||
assertThat(document.getSectionTree().getAllTableOfContentItems()
|
||||
.stream()
|
||||
.flatMap(tocItem -> tocItem.getSectionBlocks()
|
||||
.stream()
|
||||
.filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
|
||||
.map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
|
||||
.toList()).isNotEmpty();
|
||||
TablePageBlock firstTable = document.getTableOfContents().getAllTableOfContentItems()
|
||||
TablePageBlock firstTable = document.getSectionTree().getAllTableOfContentItems()
|
||||
.stream()
|
||||
.flatMap(tocItem -> tocItem.getSectionBlocks()
|
||||
.stream()
|
||||
@ -297,7 +297,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
||||
.get(0);
|
||||
assertThat(firstTable.getColCount()).isEqualTo(9);
|
||||
assertThat(firstTable.getRowCount()).isEqualTo(5);
|
||||
TablePageBlock secondTable = document.getTableOfContents().getAllTableOfContentItems()
|
||||
TablePageBlock secondTable = document.getSectionTree().getAllTableOfContentItems()
|
||||
.stream()
|
||||
.flatMap(tocItem -> tocItem.getSectionBlocks()
|
||||
.stream()
|
||||
@ -327,14 +327,14 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
||||
"files/syngenta/CustomerFiles/SinglePages/Rotated Table Headers - Page4_65 Mesotrione - EU AIR3 - LCA Section 1 Supplement Reference List.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||
assertThat(document.getTableOfContents().getAllTableOfContentItems()
|
||||
assertThat(document.getSectionTree().getAllTableOfContentItems()
|
||||
.stream()
|
||||
.flatMap(tocItem -> tocItem.getSectionBlocks()
|
||||
.stream()
|
||||
.filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
|
||||
.map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
|
||||
.toList()).isNotEmpty();
|
||||
TablePageBlock firstTable = document.getTableOfContents().getAllTableOfContentItems()
|
||||
TablePageBlock firstTable = document.getSectionTree().getAllTableOfContentItems()
|
||||
.stream()
|
||||
.flatMap(tocItem -> tocItem.getSectionBlocks()
|
||||
.stream()
|
||||
@ -344,7 +344,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
||||
.get(0);
|
||||
assertThat(firstTable.getColCount()).isEqualTo(8);
|
||||
assertThat(firstTable.getRowCount()).isEqualTo(1);
|
||||
TablePageBlock secondTable = document.getTableOfContents().getAllTableOfContentItems()
|
||||
TablePageBlock secondTable = document.getSectionTree().getAllTableOfContentItems()
|
||||
.stream()
|
||||
.flatMap(tocItem -> tocItem.getSectionBlocks()
|
||||
.stream()
|
||||
@ -844,7 +844,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
||||
@SneakyThrows
|
||||
private void toHtml(ClassificationDocument document, String filename) {
|
||||
|
||||
var tables = document.getTableOfContents().getAllTableOfContentItems()
|
||||
var tables = document.getSectionTree().getAllTableOfContentItems()
|
||||
.stream()
|
||||
.flatMap(tocItem -> tocItem.getSectionBlocks()
|
||||
.stream()
|
||||
@ -871,7 +871,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
||||
|
||||
private void validateTable(ClassificationDocument document, int tableIndex, int colCount, int rowCount, int emptyCellsCountCorrect, int emptyCellsCountIncorrect) {
|
||||
|
||||
TablePageBlock table = document.getTableOfContents().getAllTableOfContentItems()
|
||||
TablePageBlock table = document.getSectionTree().getAllTableOfContentItems()
|
||||
.stream()
|
||||
.flatMap(tocItem -> tocItem.getSectionBlocks()
|
||||
.stream()
|
||||
@ -901,7 +901,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
||||
|
||||
private void validateTable(ClassificationDocument document, int tableIndex, List<List<String>> values) {
|
||||
|
||||
TablePageBlock table = document.getTableOfContents().getAllTableOfContentItems()
|
||||
TablePageBlock table = document.getSectionTree().getAllTableOfContentItems()
|
||||
.stream()
|
||||
.flatMap(tocItem -> tocItem.getSectionBlocks()
|
||||
.stream()
|
||||
@ -929,7 +929,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
||||
|
||||
private void validateTableSize(ClassificationDocument document, int tableSize) {
|
||||
|
||||
assertThat(document.getTableOfContents().getAllTableOfContentItems()
|
||||
assertThat(document.getSectionTree().getAllTableOfContentItems()
|
||||
.stream()
|
||||
.flatMap(tocItem -> tocItem.getSectionBlocks()
|
||||
.stream()
|
||||
|
||||
@ -18,8 +18,8 @@ import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructureWrapper;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.EntryDataProto.EntryData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeTypeProto.NodeType;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.EntryDataProto.EntryData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.PageContents;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
|
||||
@ -227,9 +227,9 @@ public class PdfDraw {
|
||||
return DrawingOptions.builder().stroke(true).strokeColor(switch (entry.getType()) {
|
||||
case DOCUMENT -> Color.LIGHT_GRAY;
|
||||
case HEADER, FOOTER -> Color.GREEN;
|
||||
case PARAGRAPH -> Color.BLUE;
|
||||
case PARAGRAPH, TABLE_OF_CONTENTS_ITEM -> Color.BLUE;
|
||||
case HEADLINE -> Color.RED;
|
||||
case SECTION, SUPER_SECTION -> Color.BLACK;
|
||||
case SECTION, SUPER_SECTION, TABLE_OF_CONTENTS -> Color.BLACK;
|
||||
case TABLE -> Color.ORANGE;
|
||||
case TABLE_CELL -> Color.GRAY;
|
||||
case IMAGE -> Color.MAGENTA;
|
||||
|
||||
@ -40,7 +40,7 @@ public record LayerIdentifier(String name, String markedContentName) {
|
||||
public static final LayerIdentifier KNECON_LAYOUT_FIGURES = new LayerIdentifier("Figures", "LAYOUT_FIGURES");
|
||||
public static final LayerIdentifier KNECON_LAYOUT_IMAGES = new LayerIdentifier("Images", "LAYOUT_IMAGES");
|
||||
public static final LayerIdentifier KNECON_LAYOUT_TREE_IDs = new LayerIdentifier("Tree IDs", "LAYOUT_TREE_IDs");
|
||||
public static final LayerIdentifier OUTLINE_HEADLINES = new LayerIdentifier("Outline Headlines", "OUTLINE_HEADLINES");
|
||||
public static final LayerIdentifier KNECON_LAYOUT_TOC = new LayerIdentifier("Table of Contents", "TABLE_OF_CONTENTS");
|
||||
|
||||
//layout grid debug
|
||||
public static final LayerIdentifier KNECON_LAYOUT_DEBUG = new LayerIdentifier("Layout elements", "DEBUG_LAYOUT");
|
||||
@ -55,6 +55,7 @@ public record LayerIdentifier(String name, String markedContentName) {
|
||||
public static final LayerIdentifier NEIGHBOURS = new LayerIdentifier("Neighbours", "NEIGHBOURS");
|
||||
public static final LayerIdentifier CHARACTERS = new LayerIdentifier("Characters", "CHARACTERS");
|
||||
public static final LayerIdentifier OUTLINE_OBJECTS = new LayerIdentifier("Outline Positions", "OUTLINE_OBJECTS");
|
||||
public static final LayerIdentifier OUTLINE_HEADLINES = new LayerIdentifier("Outline Headlines", "OUTLINE_HEADLINES");
|
||||
public static final LayerIdentifier SENTENCES = new LayerIdentifier("Sentences", "SENTENCES");
|
||||
public static final LayerIdentifier TOC_PAGES = new LayerIdentifier("TOC pages", "TOC_PAGES");
|
||||
public static final LayerIdentifier TOC_BLOCKS = new LayerIdentifier("TOC blocks", "TOC_BLOCKS");
|
||||
|
||||
@ -32,6 +32,8 @@ public class LayoutDebugLayerConfig extends AbstractLayerGroup {
|
||||
protected static final Color UNDERLINE_RULING_COLOR = new Color(6, 39, 171);
|
||||
protected static final Color STRIKETROUGH_RULING_COLOR = new Color(171, 6, 6);
|
||||
|
||||
protected static final Color HEADLINE_COLOR = new Color(162, 56, 56);
|
||||
|
||||
protected static final Color CELLS_COLOR = new Color(31, 214, 27);
|
||||
protected static final Color OUTLINE_OBJECT_COLOR = new Color(214, 27, 183);
|
||||
|
||||
@ -62,7 +64,7 @@ public class LayoutDebugLayerConfig extends AbstractLayerGroup {
|
||||
protected final Visualizations tocPages = Visualizations.builder().layer(LayerIdentifier.TOC_PAGES).build();
|
||||
protected final Visualizations tocBlocks = Visualizations.builder().layer(LayerIdentifier.TOC_BLOCKS).build();
|
||||
protected final Visualizations listIdentifiers = Visualizations.builder().layer(LayerIdentifier.LIST_IDENTIFIERS).build();
|
||||
|
||||
protected final Visualizations outlineHeadlines = Visualizations.builder().layer(LayerIdentifier.OUTLINE_HEADLINES).build();
|
||||
|
||||
public List<Visualizations> getVisualizations() {
|
||||
|
||||
@ -78,6 +80,7 @@ public class LayoutDebugLayerConfig extends AbstractLayerGroup {
|
||||
mainBody, //
|
||||
markedContent, //
|
||||
outlineObjects, //
|
||||
outlineHeadlines, //
|
||||
tocPages, //
|
||||
tocBlocks, //
|
||||
listIdentifiers //
|
||||
|
||||
@ -34,8 +34,10 @@ public class LayoutGridLayerConfig extends AbstractLayerGroup {
|
||||
protected static final Color KEY_VALUE_BBOX_COLOR = new Color(0, 39, 85);
|
||||
protected static final Color KEY_COLOR = new Color(30, 92, 172);
|
||||
protected static final Color VALUE_COLOR = new Color(30, 172, 146);
|
||||
protected static final Color TOC_COLOR = new Color(0, 86, 198);
|
||||
|
||||
protected final Visualizations sections = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_SECTION).visibleByDefault(true).build();
|
||||
protected final Visualizations toc = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_TOC).visibleByDefault(true).build();
|
||||
protected final Visualizations paragraphs = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_PARAGRAPH).visibleByDefault(true).build();
|
||||
protected final Visualizations headlines = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_HEADLINE).visibleByDefault(true).build();
|
||||
protected final Visualizations tables = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_TABLE).visibleByDefault(true).build();
|
||||
@ -44,12 +46,12 @@ public class LayoutGridLayerConfig extends AbstractLayerGroup {
|
||||
protected final Visualizations images = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_IMAGES).build();
|
||||
protected final Visualizations keyValue = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_KEY_VALUE).build();
|
||||
protected final Visualizations treeIds = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_TREE_IDs).build();
|
||||
protected final Visualizations outlineHeadlines = Visualizations.builder().layer(LayerIdentifier.OUTLINE_HEADLINES).build();
|
||||
|
||||
|
||||
@Override
|
||||
public List<Visualizations> getVisualizations() {
|
||||
|
||||
return List.of(headlines, paragraphs, tables, sections, headerFooter, keyValue, figures, images, treeIds, outlineHeadlines);
|
||||
return List.of(headlines, paragraphs, tables, sections, headerFooter, toc, keyValue, figures, images, treeIds);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user