RED-9139: move document to its own module, add TableOfContents and TableOfContentsItem

This commit is contained in:
Kilian Schüttler 2024-11-14 16:50:42 +01:00
parent 41f824297c
commit f9d939958f
135 changed files with 13478 additions and 459 deletions

View File

@ -7,12 +7,12 @@ include:
ref: 'main'
file: 'ci-templates/gradle_java.yml'
deploy JavaDoc:
publish dependencies:
stage: deploy
tags:
- dind
script:
- echo "Building JavaDoc with gradle version ${BUILDVERSION}"
- echo "Publishing dependencies with gradle version ${BUILDVERSION}"
- gradle -Pversion=${BUILDVERSION} publish
- echo "BUILDVERSION=$(echo ${BUILDVERSION})" >> variables.env
artifacts:
@ -21,6 +21,7 @@ deploy JavaDoc:
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
- if: $CI_COMMIT_BRANCH =~ /^release/
- if: $CI_COMMIT_BRANCH =~ /^feature/
- if: $CI_COMMIT_TAG
generate JavaDoc:
@ -42,7 +43,7 @@ pages:
stage: deploy
needs:
- generate JavaDoc
- deploy JavaDoc
- publish dependencies
- calculate minor version
pages:
path_prefix: "$BUILDVERSION"

View File

@ -15,8 +15,13 @@ pmd {
isConsoleOutput = true
}
tasks.checkstyleMain {
exclude("**/data/**") // ignore generated proto files
}
tasks.pmdMain {
pmd.ruleSetFiles = files("${rootDir}/config/pmd/pmd.xml")
exclude("**/data/**") // ignore generated proto files
}
tasks.pmdTest {

View File

@ -0,0 +1,35 @@
plugins {
id("com.iqser.red.service.java-conventions")
id("io.freefair.lombok") version "8.4"
}
description = "redaction-service-document"
val persistenceServiceVersion = "2.612.0-RED10072.1"
val layoutParserVersion = "newNode"
group = "com.knecon.fforesight"
dependencies {
implementation("com.iqser.red.service:persistence-service-internal-api-v1:${persistenceServiceVersion}")
api("com.google.protobuf:protobuf-java-util:4.28.3")
testImplementation("org.junit.jupiter:junit-jupiter-api:5.8.1")
testRuntimeOnly("org.junit.jupiter:junit-jupiter-engine:5.8.1")
}
publishing {
publications {
create<MavenPublication>(name) {
from(components["java"])
}
}
repositories {
maven {
url = uri("https://nexus.knecon.com/repository/red-platform-releases/")
credentials {
username = providers.gradleProperty("mavenUser").getOrNull();
password = providers.gradleProperty("mavenPassword").getOrNull();
}
}
}
}

View File

@ -1,13 +1,12 @@
package com.iqser.red.service.redaction.v1.server.model.document;
package com.iqser.red.service.redaction.v1.server.data;
import static com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructureProto.DocumentStructure;
import static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure;
import java.io.Serializable;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPageProto.AllDocumentPages;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionDataProto.AllDocumentPositionData;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructureWrapper;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextDataProto.AllDocumentTextData;
import com.iqser.red.service.redaction.v1.server.data.DocumentPageProto.AllDocumentPages;
import com.iqser.red.service.redaction.v1.server.data.DocumentPositionDataProto.AllDocumentPositionData;
import com.iqser.red.service.redaction.v1.server.data.DocumentTextDataProto.AllDocumentTextData;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;

View File

@ -0,0 +1,694 @@
// Generated by the protocol buffer compiler. DO NOT EDIT!
// NO CHECKED-IN PROTOBUF GENCODE
// source: DocumentStructure.proto
// Protobuf Java Version: 4.28.3
package com.iqser.red.service.redaction.v1.server.data;
public final class DocumentStructureProto {
private DocumentStructureProto() {}
static {
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
/* major= */ 4,
/* minor= */ 28,
/* patch= */ 3,
/* suffix= */ "",
DocumentStructureProto.class.getName());
}
public static void registerAllExtensions(
com.google.protobuf.ExtensionRegistryLite registry) {
}
public static void registerAllExtensions(
com.google.protobuf.ExtensionRegistry registry) {
registerAllExtensions(
(com.google.protobuf.ExtensionRegistryLite) registry);
}
public interface DocumentStructureOrBuilder extends
// @@protoc_insertion_point(interface_extends:DocumentStructure)
com.google.protobuf.MessageOrBuilder {
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
* @return Whether the root field is set.
*/
boolean hasRoot();
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
* @return The root.
*/
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData getRoot();
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
*/
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryDataOrBuilder getRootOrBuilder();
}
/**
* Protobuf type {@code DocumentStructure}
*/
public static final class DocumentStructure extends
com.google.protobuf.GeneratedMessage implements
// @@protoc_insertion_point(message_implements:DocumentStructure)
DocumentStructureOrBuilder {
private static final long serialVersionUID = 0L;
static {
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
/* major= */ 4,
/* minor= */ 28,
/* patch= */ 3,
/* suffix= */ "",
DocumentStructure.class.getName());
}
// Use DocumentStructure.newBuilder() to construct.
private DocumentStructure(com.google.protobuf.GeneratedMessage.Builder<?> builder) {
super(builder);
}
private DocumentStructure() {
}
public static final com.google.protobuf.Descriptors.Descriptor
getDescriptor() {
return com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.internal_static_DocumentStructure_descriptor;
}
@java.lang.Override
protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
internalGetFieldAccessorTable() {
return com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.internal_static_DocumentStructure_fieldAccessorTable
.ensureFieldAccessorsInitialized(
com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure.class, com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure.Builder.class);
}
private int bitField0_;
public static final int ROOT_FIELD_NUMBER = 1;
private com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData root_;
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
* @return Whether the root field is set.
*/
@java.lang.Override
public boolean hasRoot() {
return ((bitField0_ & 0x00000001) != 0);
}
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
* @return The root.
*/
@java.lang.Override
public com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData getRoot() {
return root_ == null ? com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.getDefaultInstance() : root_;
}
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
*/
@java.lang.Override
public com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryDataOrBuilder getRootOrBuilder() {
return root_ == null ? com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.getDefaultInstance() : root_;
}
private byte memoizedIsInitialized = -1;
@java.lang.Override
public final boolean isInitialized() {
byte isInitialized = memoizedIsInitialized;
if (isInitialized == 1) return true;
if (isInitialized == 0) return false;
memoizedIsInitialized = 1;
return true;
}
@java.lang.Override
public void writeTo(com.google.protobuf.CodedOutputStream output)
throws java.io.IOException {
if (((bitField0_ & 0x00000001) != 0)) {
output.writeMessage(1, getRoot());
}
getUnknownFields().writeTo(output);
}
@java.lang.Override
public int getSerializedSize() {
int size = memoizedSize;
if (size != -1) return size;
size = 0;
if (((bitField0_ & 0x00000001) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeMessageSize(1, getRoot());
}
size += getUnknownFields().getSerializedSize();
memoizedSize = size;
return size;
}
@java.lang.Override
public boolean equals(final java.lang.Object obj) {
if (obj == this) {
return true;
}
if (!(obj instanceof com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure)) {
return super.equals(obj);
}
com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure other = (com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure) obj;
if (hasRoot() != other.hasRoot()) return false;
if (hasRoot()) {
if (!getRoot()
.equals(other.getRoot())) return false;
}
if (!getUnknownFields().equals(other.getUnknownFields())) return false;
return true;
}
@java.lang.Override
public int hashCode() {
if (memoizedHashCode != 0) {
return memoizedHashCode;
}
int hash = 41;
hash = (19 * hash) + getDescriptor().hashCode();
if (hasRoot()) {
hash = (37 * hash) + ROOT_FIELD_NUMBER;
hash = (53 * hash) + getRoot().hashCode();
}
hash = (29 * hash) + getUnknownFields().hashCode();
memoizedHashCode = hash;
return hash;
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(
java.nio.ByteBuffer data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(
java.nio.ByteBuffer data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(
com.google.protobuf.ByteString data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(
com.google.protobuf.ByteString data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(byte[] data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(
byte[] data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(java.io.InputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessage
.parseWithIOException(PARSER, input);
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(
java.io.InputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessage
.parseWithIOException(PARSER, input, extensionRegistry);
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseDelimitedFrom(java.io.InputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessage
.parseDelimitedWithIOException(PARSER, input);
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseDelimitedFrom(
java.io.InputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessage
.parseDelimitedWithIOException(PARSER, input, extensionRegistry);
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(
com.google.protobuf.CodedInputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessage
.parseWithIOException(PARSER, input);
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessage
.parseWithIOException(PARSER, input, extensionRegistry);
}
@java.lang.Override
public Builder newBuilderForType() { return newBuilder(); }
public static Builder newBuilder() {
return DEFAULT_INSTANCE.toBuilder();
}
public static Builder newBuilder(com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure prototype) {
return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype);
}
@java.lang.Override
public Builder toBuilder() {
return this == DEFAULT_INSTANCE
? new Builder() : new Builder().mergeFrom(this);
}
@java.lang.Override
protected Builder newBuilderForType(
com.google.protobuf.GeneratedMessage.BuilderParent parent) {
Builder builder = new Builder(parent);
return builder;
}
/**
* Protobuf type {@code DocumentStructure}
*/
public static final class Builder extends
com.google.protobuf.GeneratedMessage.Builder<Builder> implements
// @@protoc_insertion_point(builder_implements:DocumentStructure)
com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructureOrBuilder {
public static final com.google.protobuf.Descriptors.Descriptor
getDescriptor() {
return com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.internal_static_DocumentStructure_descriptor;
}
@java.lang.Override
protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
internalGetFieldAccessorTable() {
return com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.internal_static_DocumentStructure_fieldAccessorTable
.ensureFieldAccessorsInitialized(
com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure.class, com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure.Builder.class);
}
// Construct using com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure.newBuilder()
private Builder() {
maybeForceBuilderInitialization();
}
private Builder(
com.google.protobuf.GeneratedMessage.BuilderParent parent) {
super(parent);
maybeForceBuilderInitialization();
}
private void maybeForceBuilderInitialization() {
if (com.google.protobuf.GeneratedMessage
.alwaysUseFieldBuilders) {
getRootFieldBuilder();
}
}
@java.lang.Override
public Builder clear() {
super.clear();
bitField0_ = 0;
root_ = null;
if (rootBuilder_ != null) {
rootBuilder_.dispose();
rootBuilder_ = null;
}
return this;
}
@java.lang.Override
public com.google.protobuf.Descriptors.Descriptor
getDescriptorForType() {
return com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.internal_static_DocumentStructure_descriptor;
}
@java.lang.Override
public com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure getDefaultInstanceForType() {
return com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure.getDefaultInstance();
}
@java.lang.Override
public com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure build() {
com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure result = buildPartial();
if (!result.isInitialized()) {
throw newUninitializedMessageException(result);
}
return result;
}
@java.lang.Override
public com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure buildPartial() {
com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure result = new com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure(this);
if (bitField0_ != 0) { buildPartial0(result); }
onBuilt();
return result;
}
private void buildPartial0(com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure result) {
int from_bitField0_ = bitField0_;
int to_bitField0_ = 0;
if (((from_bitField0_ & 0x00000001) != 0)) {
result.root_ = rootBuilder_ == null
? root_
: rootBuilder_.build();
to_bitField0_ |= 0x00000001;
}
result.bitField0_ |= to_bitField0_;
}
@java.lang.Override
public Builder mergeFrom(com.google.protobuf.Message other) {
if (other instanceof com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure) {
return mergeFrom((com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure)other);
} else {
super.mergeFrom(other);
return this;
}
}
public Builder mergeFrom(com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure other) {
if (other == com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure.getDefaultInstance()) return this;
if (other.hasRoot()) {
mergeRoot(other.getRoot());
}
this.mergeUnknownFields(other.getUnknownFields());
onChanged();
return this;
}
@java.lang.Override
public final boolean isInitialized() {
return true;
}
@java.lang.Override
public Builder mergeFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
if (extensionRegistry == null) {
throw new java.lang.NullPointerException();
}
try {
boolean done = false;
while (!done) {
int tag = input.readTag();
switch (tag) {
case 0:
done = true;
break;
case 10: {
input.readMessage(
getRootFieldBuilder().getBuilder(),
extensionRegistry);
bitField0_ |= 0x00000001;
break;
} // case 10
default: {
if (!super.parseUnknownField(input, extensionRegistry, tag)) {
done = true; // was an endgroup tag
}
break;
} // default:
} // switch (tag)
} // while (!done)
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
throw e.unwrapIOException();
} finally {
onChanged();
} // finally
return this;
}
private int bitField0_;
private com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData root_;
private com.google.protobuf.SingleFieldBuilder<
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData, com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.Builder, com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryDataOrBuilder> rootBuilder_;
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
* @return Whether the root field is set.
*/
public boolean hasRoot() {
return ((bitField0_ & 0x00000001) != 0);
}
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
* @return The root.
*/
public com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData getRoot() {
if (rootBuilder_ == null) {
return root_ == null ? com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.getDefaultInstance() : root_;
} else {
return rootBuilder_.getMessage();
}
}
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
*/
public Builder setRoot(com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData value) {
if (rootBuilder_ == null) {
if (value == null) {
throw new NullPointerException();
}
root_ = value;
} else {
rootBuilder_.setMessage(value);
}
bitField0_ |= 0x00000001;
onChanged();
return this;
}
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
*/
public Builder setRoot(
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.Builder builderForValue) {
if (rootBuilder_ == null) {
root_ = builderForValue.build();
} else {
rootBuilder_.setMessage(builderForValue.build());
}
bitField0_ |= 0x00000001;
onChanged();
return this;
}
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
*/
public Builder mergeRoot(com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData value) {
if (rootBuilder_ == null) {
if (((bitField0_ & 0x00000001) != 0) &&
root_ != null &&
root_ != com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.getDefaultInstance()) {
getRootBuilder().mergeFrom(value);
} else {
root_ = value;
}
} else {
rootBuilder_.mergeFrom(value);
}
if (root_ != null) {
bitField0_ |= 0x00000001;
onChanged();
}
return this;
}
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
*/
public Builder clearRoot() {
bitField0_ = (bitField0_ & ~0x00000001);
root_ = null;
if (rootBuilder_ != null) {
rootBuilder_.dispose();
rootBuilder_ = null;
}
onChanged();
return this;
}
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
*/
public com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.Builder getRootBuilder() {
bitField0_ |= 0x00000001;
onChanged();
return getRootFieldBuilder().getBuilder();
}
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
*/
public com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryDataOrBuilder getRootOrBuilder() {
if (rootBuilder_ != null) {
return rootBuilder_.getMessageOrBuilder();
} else {
return root_ == null ?
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.getDefaultInstance() : root_;
}
}
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
*/
private com.google.protobuf.SingleFieldBuilder<
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData, com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.Builder, com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryDataOrBuilder>
getRootFieldBuilder() {
if (rootBuilder_ == null) {
rootBuilder_ = new com.google.protobuf.SingleFieldBuilder<
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData, com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.Builder, com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryDataOrBuilder>(
getRoot(),
getParentForChildren(),
isClean());
root_ = null;
}
return rootBuilder_;
}
// @@protoc_insertion_point(builder_scope:DocumentStructure)
}
// @@protoc_insertion_point(class_scope:DocumentStructure)
private static final com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure DEFAULT_INSTANCE;
static {
DEFAULT_INSTANCE = new com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure();
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure getDefaultInstance() {
return DEFAULT_INSTANCE;
}
private static final com.google.protobuf.Parser<DocumentStructure>
PARSER = new com.google.protobuf.AbstractParser<DocumentStructure>() {
@java.lang.Override
public DocumentStructure parsePartialFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
Builder builder = newBuilder();
try {
builder.mergeFrom(input, extensionRegistry);
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
throw e.setUnfinishedMessage(builder.buildPartial());
} catch (com.google.protobuf.UninitializedMessageException e) {
throw e.asInvalidProtocolBufferException().setUnfinishedMessage(builder.buildPartial());
} catch (java.io.IOException e) {
throw new com.google.protobuf.InvalidProtocolBufferException(e)
.setUnfinishedMessage(builder.buildPartial());
}
return builder.buildPartial();
}
};
public static com.google.protobuf.Parser<DocumentStructure> parser() {
return PARSER;
}
@java.lang.Override
public com.google.protobuf.Parser<DocumentStructure> getParserForType() {
return PARSER;
}
@java.lang.Override
public com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure getDefaultInstanceForType() {
return DEFAULT_INSTANCE;
}
}
private static final com.google.protobuf.Descriptors.Descriptor
internal_static_DocumentStructure_descriptor;
private static final
com.google.protobuf.GeneratedMessage.FieldAccessorTable
internal_static_DocumentStructure_fieldAccessorTable;
public static com.google.protobuf.Descriptors.FileDescriptor
getDescriptor() {
return descriptor;
}
private static com.google.protobuf.Descriptors.FileDescriptor
descriptor;
static {
java.lang.String[] descriptorData = {
"\n\027DocumentStructure.proto\032\017EntryData.pro" +
"to\"-\n\021DocumentStructure\022\030\n\004root\030\001 \001(\0132\n." +
"EntryDataBH\n.com.iqser.red.service.redac" +
"tion.v1.server.dataB\026DocumentStructurePr" +
"otob\006proto3"
};
descriptor = com.google.protobuf.Descriptors.FileDescriptor
.internalBuildGeneratedFileFrom(descriptorData,
new com.google.protobuf.Descriptors.FileDescriptor[] {
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.getDescriptor(),
});
internal_static_DocumentStructure_descriptor =
getDescriptor().getMessageTypes().get(0);
internal_static_DocumentStructure_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_DocumentStructure_descriptor,
new java.lang.String[] { "Root", });
descriptor.resolveAllFeaturesImmutable();
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.getDescriptor();
}
// @@protoc_insertion_point(outer_class_scope)
}

View File

@ -0,0 +1,115 @@
package com.iqser.red.service.redaction.v1.server.data;
import static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure;
import static com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData;
import java.awt.geom.Rectangle2D;
import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Stream;
import lombok.AllArgsConstructor;
import lombok.Getter;
@Getter
@AllArgsConstructor
public class DocumentStructureWrapper implements Serializable {
private final DocumentStructure documentStructure;
public static class TableProperties implements Serializable {
public static final String NUMBER_OF_ROWS = "numberOfRows";
public static final String NUMBER_OF_COLS = "numberOfCols";
}
public static class ImageProperties implements Serializable {
public static final String TRANSPARENT = "transparent";
public static final String IMAGE_TYPE = "imageType";
public static final String POSITION = "position";
public static final String ID = "id";
public static final String REPRESENTATION_HASH = "representationHash";
}
public static class TableCellProperties implements Serializable {
public static final String B_BOX = "bBox";
public static final String ROW = "row";
public static final String COL = "col";
public static final String HEADER = "header";
}
public static class DuplicateParagraphProperties implements Serializable {
public static final String UNSORTED_TEXTBLOCK_ID = "utbid";
}
public static final String RECTANGLE_DELIMITER = ";";
public static Rectangle2D parseRectangle2D(String bBox) {
List<Float> floats = Arrays.stream(bBox.split(RECTANGLE_DELIMITER))
.map(Float::parseFloat)
.toList();
return new Rectangle2D.Float(floats.get(0), floats.get(1), floats.get(2), floats.get(3));
}
public static double[] parseRepresentationVector(String representationHash) {
String[] stringArray = representationHash.split("[,\\s]+");
double[] doubleArray = new double[stringArray.length];
for (int i = 0; i < stringArray.length; i++) {
doubleArray[i] = Double.parseDouble(stringArray[i]);
}
return doubleArray;
}
public EntryData get(List<Integer> tocId) {
if (tocId.isEmpty()) {
return documentStructure.getRoot();
}
EntryData entry = documentStructure.getRoot().getChildrenList()
.get(tocId.get(0));
for (int id : tocId.subList(1, tocId.size())) {
entry = entry.getChildrenList()
.get(id);
}
return entry;
}
public Stream<EntryData> streamAllEntries() {
return flatten(documentStructure.getRoot());
}
public String toString() {
return String.join("\n",
streamAllEntries().map(EntryData::toString)
.toList());
}
private static Stream<EntryData> flatten(EntryData entry) {
return Stream.concat(Stream.of(entry),
entry.getChildrenList()
.stream()
.flatMap(DocumentStructureWrapper::flatten));
}
}

View File

@ -0,0 +1,176 @@
// Generated by the protocol buffer compiler. DO NOT EDIT!
// NO CHECKED-IN PROTOBUF GENCODE
// source: LayoutEngine.proto
// Protobuf Java Version: 4.28.3
package com.iqser.red.service.redaction.v1.server.data;
public final class LayoutEngineProto {
private LayoutEngineProto() {}
static {
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
/* major= */ 4,
/* minor= */ 28,
/* patch= */ 3,
/* suffix= */ "",
LayoutEngineProto.class.getName());
}
public static void registerAllExtensions(
com.google.protobuf.ExtensionRegistryLite registry) {
}
public static void registerAllExtensions(
com.google.protobuf.ExtensionRegistry registry) {
registerAllExtensions(
(com.google.protobuf.ExtensionRegistryLite) registry);
}
/**
* Protobuf enum {@code LayoutEngine}
*/
public enum LayoutEngine
implements com.google.protobuf.ProtocolMessageEnum {
/**
* <code>ALGORITHM = 0;</code>
*/
ALGORITHM(0),
/**
* <code>AI = 1;</code>
*/
AI(1),
/**
* <code>OUTLINE = 2;</code>
*/
OUTLINE(2),
UNRECOGNIZED(-1),
;
static {
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
/* major= */ 4,
/* minor= */ 28,
/* patch= */ 3,
/* suffix= */ "",
LayoutEngine.class.getName());
}
/**
* <code>ALGORITHM = 0;</code>
*/
public static final int ALGORITHM_VALUE = 0;
/**
* <code>AI = 1;</code>
*/
public static final int AI_VALUE = 1;
/**
* <code>OUTLINE = 2;</code>
*/
public static final int OUTLINE_VALUE = 2;
public final int getNumber() {
if (this == UNRECOGNIZED) {
throw new java.lang.IllegalArgumentException(
"Can't get the number of an unknown enum value.");
}
return value;
}
/**
* @param value The numeric wire value of the corresponding enum entry.
* @return The enum associated with the given numeric wire value.
* @deprecated Use {@link #forNumber(int)} instead.
*/
@java.lang.Deprecated
public static LayoutEngine valueOf(int value) {
return forNumber(value);
}
/**
* @param value The numeric wire value of the corresponding enum entry.
* @return The enum associated with the given numeric wire value.
*/
public static LayoutEngine forNumber(int value) {
switch (value) {
case 0: return ALGORITHM;
case 1: return AI;
case 2: return OUTLINE;
default: return null;
}
}
public static com.google.protobuf.Internal.EnumLiteMap<LayoutEngine>
internalGetValueMap() {
return internalValueMap;
}
private static final com.google.protobuf.Internal.EnumLiteMap<
LayoutEngine> internalValueMap =
new com.google.protobuf.Internal.EnumLiteMap<LayoutEngine>() {
public LayoutEngine findValueByNumber(int number) {
return LayoutEngine.forNumber(number);
}
};
public final com.google.protobuf.Descriptors.EnumValueDescriptor
getValueDescriptor() {
if (this == UNRECOGNIZED) {
throw new java.lang.IllegalStateException(
"Can't get the descriptor of an unrecognized enum value.");
}
return getDescriptor().getValues().get(ordinal());
}
public final com.google.protobuf.Descriptors.EnumDescriptor
getDescriptorForType() {
return getDescriptor();
}
public static final com.google.protobuf.Descriptors.EnumDescriptor
getDescriptor() {
return com.iqser.red.service.redaction.v1.server.data.LayoutEngineProto.getDescriptor().getEnumTypes().get(0);
}
private static final LayoutEngine[] VALUES = values();
public static LayoutEngine valueOf(
com.google.protobuf.Descriptors.EnumValueDescriptor desc) {
if (desc.getType() != getDescriptor()) {
throw new java.lang.IllegalArgumentException(
"EnumValueDescriptor is not for this type.");
}
if (desc.getIndex() == -1) {
return UNRECOGNIZED;
}
return VALUES[desc.getIndex()];
}
private final int value;
private LayoutEngine(int value) {
this.value = value;
}
// @@protoc_insertion_point(enum_scope:LayoutEngine)
}
public static com.google.protobuf.Descriptors.FileDescriptor
getDescriptor() {
return descriptor;
}
private static com.google.protobuf.Descriptors.FileDescriptor
descriptor;
static {
java.lang.String[] descriptorData = {
"\n\022LayoutEngine.proto*2\n\014LayoutEngine\022\r\n\t" +
"ALGORITHM\020\000\022\006\n\002AI\020\001\022\013\n\007OUTLINE\020\002BC\n.com." +
"iqser.red.service.redaction.v1.server.da" +
"taB\021LayoutEngineProtob\006proto3"
};
descriptor = com.google.protobuf.Descriptors.FileDescriptor
.internalBuildGeneratedFileFrom(descriptorData,
new com.google.protobuf.Descriptors.FileDescriptor[] {
});
descriptor.resolveAllFeaturesImmutable();
}
// @@protoc_insertion_point(outer_class_scope)
}

View File

@ -0,0 +1,261 @@
// Generated by the protocol buffer compiler. DO NOT EDIT!
// NO CHECKED-IN PROTOBUF GENCODE
// source: NodeType.proto
// Protobuf Java Version: 4.28.3
package com.iqser.red.service.redaction.v1.server.data;
public final class NodeTypeProto {
private NodeTypeProto() {}
static {
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
/* major= */ 4,
/* minor= */ 28,
/* patch= */ 3,
/* suffix= */ "",
NodeTypeProto.class.getName());
}
public static void registerAllExtensions(
com.google.protobuf.ExtensionRegistryLite registry) {
}
public static void registerAllExtensions(
com.google.protobuf.ExtensionRegistry registry) {
registerAllExtensions(
(com.google.protobuf.ExtensionRegistryLite) registry);
}
/**
* Protobuf enum {@code NodeType}
*/
public enum NodeType
implements com.google.protobuf.ProtocolMessageEnum {
/**
* <code>DOCUMENT = 0;</code>
*/
DOCUMENT(0),
/**
* <code>SECTION = 1;</code>
*/
SECTION(1),
/**
* <code>SUPER_SECTION = 2;</code>
*/
SUPER_SECTION(2),
/**
* <code>HEADLINE = 3;</code>
*/
HEADLINE(3),
/**
* <code>PARAGRAPH = 4;</code>
*/
PARAGRAPH(4),
/**
* <code>TABLE = 5;</code>
*/
TABLE(5),
/**
* <code>TABLE_CELL = 6;</code>
*/
TABLE_CELL(6),
/**
* <code>IMAGE = 7;</code>
*/
IMAGE(7),
/**
* <code>HEADER = 8;</code>
*/
HEADER(8),
/**
* <code>FOOTER = 9;</code>
*/
FOOTER(9),
/**
* <code>TABLE_OF_CONTENTS = 10;</code>
*/
TABLE_OF_CONTENTS(10),
/**
* <code>TABLE_OF_CONTENTS_ITEM = 11;</code>
*/
TABLE_OF_CONTENTS_ITEM(11),
UNRECOGNIZED(-1),
;
static {
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
/* major= */ 4,
/* minor= */ 28,
/* patch= */ 3,
/* suffix= */ "",
NodeType.class.getName());
}
/**
* <code>DOCUMENT = 0;</code>
*/
public static final int DOCUMENT_VALUE = 0;
/**
* <code>SECTION = 1;</code>
*/
public static final int SECTION_VALUE = 1;
/**
* <code>SUPER_SECTION = 2;</code>
*/
public static final int SUPER_SECTION_VALUE = 2;
/**
* <code>HEADLINE = 3;</code>
*/
public static final int HEADLINE_VALUE = 3;
/**
* <code>PARAGRAPH = 4;</code>
*/
public static final int PARAGRAPH_VALUE = 4;
/**
* <code>TABLE = 5;</code>
*/
public static final int TABLE_VALUE = 5;
/**
* <code>TABLE_CELL = 6;</code>
*/
public static final int TABLE_CELL_VALUE = 6;
/**
* <code>IMAGE = 7;</code>
*/
public static final int IMAGE_VALUE = 7;
/**
* <code>HEADER = 8;</code>
*/
public static final int HEADER_VALUE = 8;
/**
* <code>FOOTER = 9;</code>
*/
public static final int FOOTER_VALUE = 9;
/**
* <code>TABLE_OF_CONTENTS = 10;</code>
*/
public static final int TABLE_OF_CONTENTS_VALUE = 10;
/**
* <code>TABLE_OF_CONTENTS_ITEM = 11;</code>
*/
public static final int TABLE_OF_CONTENTS_ITEM_VALUE = 11;
public final int getNumber() {
if (this == UNRECOGNIZED) {
throw new java.lang.IllegalArgumentException(
"Can't get the number of an unknown enum value.");
}
return value;
}
/**
* @param value The numeric wire value of the corresponding enum entry.
* @return The enum associated with the given numeric wire value.
* @deprecated Use {@link #forNumber(int)} instead.
*/
@java.lang.Deprecated
public static NodeType valueOf(int value) {
return forNumber(value);
}
/**
* @param value The numeric wire value of the corresponding enum entry.
* @return The enum associated with the given numeric wire value.
*/
public static NodeType forNumber(int value) {
switch (value) {
case 0: return DOCUMENT;
case 1: return SECTION;
case 2: return SUPER_SECTION;
case 3: return HEADLINE;
case 4: return PARAGRAPH;
case 5: return TABLE;
case 6: return TABLE_CELL;
case 7: return IMAGE;
case 8: return HEADER;
case 9: return FOOTER;
case 10: return TABLE_OF_CONTENTS;
case 11: return TABLE_OF_CONTENTS_ITEM;
default: return null;
}
}
public static com.google.protobuf.Internal.EnumLiteMap<NodeType>
internalGetValueMap() {
return internalValueMap;
}
private static final com.google.protobuf.Internal.EnumLiteMap<
NodeType> internalValueMap =
new com.google.protobuf.Internal.EnumLiteMap<NodeType>() {
public NodeType findValueByNumber(int number) {
return NodeType.forNumber(number);
}
};
public final com.google.protobuf.Descriptors.EnumValueDescriptor
getValueDescriptor() {
if (this == UNRECOGNIZED) {
throw new java.lang.IllegalStateException(
"Can't get the descriptor of an unrecognized enum value.");
}
return getDescriptor().getValues().get(ordinal());
}
public final com.google.protobuf.Descriptors.EnumDescriptor
getDescriptorForType() {
return getDescriptor();
}
public static final com.google.protobuf.Descriptors.EnumDescriptor
getDescriptor() {
return com.iqser.red.service.redaction.v1.server.data.NodeTypeProto.getDescriptor().getEnumTypes().get(0);
}
private static final NodeType[] VALUES = values();
public static NodeType valueOf(
com.google.protobuf.Descriptors.EnumValueDescriptor desc) {
if (desc.getType() != getDescriptor()) {
throw new java.lang.IllegalArgumentException(
"EnumValueDescriptor is not for this type.");
}
if (desc.getIndex() == -1) {
return UNRECOGNIZED;
}
return VALUES[desc.getIndex()];
}
private final int value;
private NodeType(int value) {
this.value = value;
}
// @@protoc_insertion_point(enum_scope:NodeType)
}
public static com.google.protobuf.Descriptors.FileDescriptor
getDescriptor() {
return descriptor;
}
private static com.google.protobuf.Descriptors.FileDescriptor
descriptor;
static {
java.lang.String[] descriptorData = {
"\n\016NodeType.proto*\306\001\n\010NodeType\022\014\n\010DOCUMEN" +
"T\020\000\022\013\n\007SECTION\020\001\022\021\n\rSUPER_SECTION\020\002\022\014\n\010H" +
"EADLINE\020\003\022\r\n\tPARAGRAPH\020\004\022\t\n\005TABLE\020\005\022\016\n\nT" +
"ABLE_CELL\020\006\022\t\n\005IMAGE\020\007\022\n\n\006HEADER\020\010\022\n\n\006FO" +
"OTER\020\t\022\025\n\021TABLE_OF_CONTENTS\020\n\022\032\n\026TABLE_O" +
"F_CONTENTS_ITEM\020\013B?\n.com.iqser.red.servi" +
"ce.redaction.v1.server.dataB\rNodeTypePro" +
"tob\006proto3"
};
descriptor = com.google.protobuf.Descriptors.FileDescriptor
.internalBuildGeneratedFileFrom(descriptorData,
new com.google.protobuf.Descriptors.FileDescriptor[] {
});
descriptor.resolveAllFeaturesImmutable();
}
// @@protoc_insertion_point(outer_class_scope)
}

View File

@ -0,0 +1,606 @@
// Generated by the protocol buffer compiler. DO NOT EDIT!
// NO CHECKED-IN PROTOBUF GENCODE
// source: Range.proto
// Protobuf Java Version: 4.28.3
package com.iqser.red.service.redaction.v1.server.data;
public final class RangeProto {
private RangeProto() {}
static {
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
/* major= */ 4,
/* minor= */ 28,
/* patch= */ 3,
/* suffix= */ "",
RangeProto.class.getName());
}
public static void registerAllExtensions(
com.google.protobuf.ExtensionRegistryLite registry) {
}
public static void registerAllExtensions(
com.google.protobuf.ExtensionRegistry registry) {
registerAllExtensions(
(com.google.protobuf.ExtensionRegistryLite) registry);
}
public interface RangeOrBuilder extends
// @@protoc_insertion_point(interface_extends:Range)
com.google.protobuf.MessageOrBuilder {
/**
* <pre>
* A start index.
* </pre>
*
* <code>int32 start = 1;</code>
* @return The start.
*/
int getStart();
/**
* <pre>
* An end index.
* </pre>
*
* <code>int32 end = 2;</code>
* @return The end.
*/
int getEnd();
}
/**
* Protobuf type {@code Range}
*/
public static final class Range extends
com.google.protobuf.GeneratedMessage implements
// @@protoc_insertion_point(message_implements:Range)
RangeOrBuilder {
private static final long serialVersionUID = 0L;
static {
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
/* major= */ 4,
/* minor= */ 28,
/* patch= */ 3,
/* suffix= */ "",
Range.class.getName());
}
// Use Range.newBuilder() to construct.
private Range(com.google.protobuf.GeneratedMessage.Builder<?> builder) {
super(builder);
}
private Range() {
}
public static final com.google.protobuf.Descriptors.Descriptor
getDescriptor() {
return com.iqser.red.service.redaction.v1.server.data.RangeProto.internal_static_Range_descriptor;
}
@java.lang.Override
protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
internalGetFieldAccessorTable() {
return com.iqser.red.service.redaction.v1.server.data.RangeProto.internal_static_Range_fieldAccessorTable
.ensureFieldAccessorsInitialized(
com.iqser.red.service.redaction.v1.server.data.RangeProto.Range.class, com.iqser.red.service.redaction.v1.server.data.RangeProto.Range.Builder.class);
}
public static final int START_FIELD_NUMBER = 1;
private int start_ = 0;
/**
* <pre>
* A start index.
* </pre>
*
* <code>int32 start = 1;</code>
* @return The start.
*/
@java.lang.Override
public int getStart() {
return start_;
}
public static final int END_FIELD_NUMBER = 2;
private int end_ = 0;
/**
* <pre>
* An end index.
* </pre>
*
* <code>int32 end = 2;</code>
* @return The end.
*/
@java.lang.Override
public int getEnd() {
return end_;
}
private byte memoizedIsInitialized = -1;
@java.lang.Override
public final boolean isInitialized() {
byte isInitialized = memoizedIsInitialized;
if (isInitialized == 1) return true;
if (isInitialized == 0) return false;
memoizedIsInitialized = 1;
return true;
}
@java.lang.Override
public void writeTo(com.google.protobuf.CodedOutputStream output)
throws java.io.IOException {
if (start_ != 0) {
output.writeInt32(1, start_);
}
if (end_ != 0) {
output.writeInt32(2, end_);
}
getUnknownFields().writeTo(output);
}
@java.lang.Override
public int getSerializedSize() {
int size = memoizedSize;
if (size != -1) return size;
size = 0;
if (start_ != 0) {
size += com.google.protobuf.CodedOutputStream
.computeInt32Size(1, start_);
}
if (end_ != 0) {
size += com.google.protobuf.CodedOutputStream
.computeInt32Size(2, end_);
}
size += getUnknownFields().getSerializedSize();
memoizedSize = size;
return size;
}
@java.lang.Override
public boolean equals(final java.lang.Object obj) {
if (obj == this) {
return true;
}
if (!(obj instanceof com.iqser.red.service.redaction.v1.server.data.RangeProto.Range)) {
return super.equals(obj);
}
com.iqser.red.service.redaction.v1.server.data.RangeProto.Range other = (com.iqser.red.service.redaction.v1.server.data.RangeProto.Range) obj;
if (getStart()
!= other.getStart()) return false;
if (getEnd()
!= other.getEnd()) return false;
if (!getUnknownFields().equals(other.getUnknownFields())) return false;
return true;
}
@java.lang.Override
public int hashCode() {
if (memoizedHashCode != 0) {
return memoizedHashCode;
}
int hash = 41;
hash = (19 * hash) + getDescriptor().hashCode();
hash = (37 * hash) + START_FIELD_NUMBER;
hash = (53 * hash) + getStart();
hash = (37 * hash) + END_FIELD_NUMBER;
hash = (53 * hash) + getEnd();
hash = (29 * hash) + getUnknownFields().hashCode();
memoizedHashCode = hash;
return hash;
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(
java.nio.ByteBuffer data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(
java.nio.ByteBuffer data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(
com.google.protobuf.ByteString data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(
com.google.protobuf.ByteString data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(byte[] data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(
byte[] data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(java.io.InputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessage
.parseWithIOException(PARSER, input);
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(
java.io.InputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessage
.parseWithIOException(PARSER, input, extensionRegistry);
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseDelimitedFrom(java.io.InputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessage
.parseDelimitedWithIOException(PARSER, input);
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseDelimitedFrom(
java.io.InputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessage
.parseDelimitedWithIOException(PARSER, input, extensionRegistry);
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(
com.google.protobuf.CodedInputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessage
.parseWithIOException(PARSER, input);
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessage
.parseWithIOException(PARSER, input, extensionRegistry);
}
@java.lang.Override
public Builder newBuilderForType() { return newBuilder(); }
public static Builder newBuilder() {
return DEFAULT_INSTANCE.toBuilder();
}
public static Builder newBuilder(com.iqser.red.service.redaction.v1.server.data.RangeProto.Range prototype) {
return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype);
}
@java.lang.Override
public Builder toBuilder() {
return this == DEFAULT_INSTANCE
? new Builder() : new Builder().mergeFrom(this);
}
@java.lang.Override
protected Builder newBuilderForType(
com.google.protobuf.GeneratedMessage.BuilderParent parent) {
Builder builder = new Builder(parent);
return builder;
}
/**
* Protobuf type {@code Range}
*/
public static final class Builder extends
com.google.protobuf.GeneratedMessage.Builder<Builder> implements
// @@protoc_insertion_point(builder_implements:Range)
com.iqser.red.service.redaction.v1.server.data.RangeProto.RangeOrBuilder {
public static final com.google.protobuf.Descriptors.Descriptor
getDescriptor() {
return com.iqser.red.service.redaction.v1.server.data.RangeProto.internal_static_Range_descriptor;
}
@java.lang.Override
protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
internalGetFieldAccessorTable() {
return com.iqser.red.service.redaction.v1.server.data.RangeProto.internal_static_Range_fieldAccessorTable
.ensureFieldAccessorsInitialized(
com.iqser.red.service.redaction.v1.server.data.RangeProto.Range.class, com.iqser.red.service.redaction.v1.server.data.RangeProto.Range.Builder.class);
}
// Construct using com.iqser.red.service.redaction.v1.server.data.RangeProto.Range.newBuilder()
private Builder() {
}
private Builder(
com.google.protobuf.GeneratedMessage.BuilderParent parent) {
super(parent);
}
@java.lang.Override
public Builder clear() {
super.clear();
bitField0_ = 0;
start_ = 0;
end_ = 0;
return this;
}
@java.lang.Override
public com.google.protobuf.Descriptors.Descriptor
getDescriptorForType() {
return com.iqser.red.service.redaction.v1.server.data.RangeProto.internal_static_Range_descriptor;
}
@java.lang.Override
public com.iqser.red.service.redaction.v1.server.data.RangeProto.Range getDefaultInstanceForType() {
return com.iqser.red.service.redaction.v1.server.data.RangeProto.Range.getDefaultInstance();
}
@java.lang.Override
public com.iqser.red.service.redaction.v1.server.data.RangeProto.Range build() {
com.iqser.red.service.redaction.v1.server.data.RangeProto.Range result = buildPartial();
if (!result.isInitialized()) {
throw newUninitializedMessageException(result);
}
return result;
}
@java.lang.Override
public com.iqser.red.service.redaction.v1.server.data.RangeProto.Range buildPartial() {
com.iqser.red.service.redaction.v1.server.data.RangeProto.Range result = new com.iqser.red.service.redaction.v1.server.data.RangeProto.Range(this);
if (bitField0_ != 0) { buildPartial0(result); }
onBuilt();
return result;
}
private void buildPartial0(com.iqser.red.service.redaction.v1.server.data.RangeProto.Range result) {
int from_bitField0_ = bitField0_;
if (((from_bitField0_ & 0x00000001) != 0)) {
result.start_ = start_;
}
if (((from_bitField0_ & 0x00000002) != 0)) {
result.end_ = end_;
}
}
@java.lang.Override
public Builder mergeFrom(com.google.protobuf.Message other) {
if (other instanceof com.iqser.red.service.redaction.v1.server.data.RangeProto.Range) {
return mergeFrom((com.iqser.red.service.redaction.v1.server.data.RangeProto.Range)other);
} else {
super.mergeFrom(other);
return this;
}
}
public Builder mergeFrom(com.iqser.red.service.redaction.v1.server.data.RangeProto.Range other) {
if (other == com.iqser.red.service.redaction.v1.server.data.RangeProto.Range.getDefaultInstance()) return this;
if (other.getStart() != 0) {
setStart(other.getStart());
}
if (other.getEnd() != 0) {
setEnd(other.getEnd());
}
this.mergeUnknownFields(other.getUnknownFields());
onChanged();
return this;
}
@java.lang.Override
public final boolean isInitialized() {
return true;
}
@java.lang.Override
public Builder mergeFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
if (extensionRegistry == null) {
throw new java.lang.NullPointerException();
}
try {
boolean done = false;
while (!done) {
int tag = input.readTag();
switch (tag) {
case 0:
done = true;
break;
case 8: {
start_ = input.readInt32();
bitField0_ |= 0x00000001;
break;
} // case 8
case 16: {
end_ = input.readInt32();
bitField0_ |= 0x00000002;
break;
} // case 16
default: {
if (!super.parseUnknownField(input, extensionRegistry, tag)) {
done = true; // was an endgroup tag
}
break;
} // default:
} // switch (tag)
} // while (!done)
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
throw e.unwrapIOException();
} finally {
onChanged();
} // finally
return this;
}
private int bitField0_;
private int start_ ;
/**
* <pre>
* A start index.
* </pre>
*
* <code>int32 start = 1;</code>
* @return The start.
*/
@java.lang.Override
public int getStart() {
return start_;
}
/**
* <pre>
* A start index.
* </pre>
*
* <code>int32 start = 1;</code>
* @param value The start to set.
* @return This builder for chaining.
*/
public Builder setStart(int value) {
start_ = value;
bitField0_ |= 0x00000001;
onChanged();
return this;
}
/**
* <pre>
* A start index.
* </pre>
*
* <code>int32 start = 1;</code>
* @return This builder for chaining.
*/
public Builder clearStart() {
bitField0_ = (bitField0_ & ~0x00000001);
start_ = 0;
onChanged();
return this;
}
private int end_ ;
/**
* <pre>
* An end index.
* </pre>
*
* <code>int32 end = 2;</code>
* @return The end.
*/
@java.lang.Override
public int getEnd() {
return end_;
}
/**
* <pre>
* An end index.
* </pre>
*
* <code>int32 end = 2;</code>
* @param value The end to set.
* @return This builder for chaining.
*/
public Builder setEnd(int value) {
end_ = value;
bitField0_ |= 0x00000002;
onChanged();
return this;
}
/**
* <pre>
* An end index.
* </pre>
*
* <code>int32 end = 2;</code>
* @return This builder for chaining.
*/
public Builder clearEnd() {
bitField0_ = (bitField0_ & ~0x00000002);
end_ = 0;
onChanged();
return this;
}
// @@protoc_insertion_point(builder_scope:Range)
}
// @@protoc_insertion_point(class_scope:Range)
private static final com.iqser.red.service.redaction.v1.server.data.RangeProto.Range DEFAULT_INSTANCE;
static {
DEFAULT_INSTANCE = new com.iqser.red.service.redaction.v1.server.data.RangeProto.Range();
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range getDefaultInstance() {
return DEFAULT_INSTANCE;
}
private static final com.google.protobuf.Parser<Range>
PARSER = new com.google.protobuf.AbstractParser<Range>() {
@java.lang.Override
public Range parsePartialFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
Builder builder = newBuilder();
try {
builder.mergeFrom(input, extensionRegistry);
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
throw e.setUnfinishedMessage(builder.buildPartial());
} catch (com.google.protobuf.UninitializedMessageException e) {
throw e.asInvalidProtocolBufferException().setUnfinishedMessage(builder.buildPartial());
} catch (java.io.IOException e) {
throw new com.google.protobuf.InvalidProtocolBufferException(e)
.setUnfinishedMessage(builder.buildPartial());
}
return builder.buildPartial();
}
};
public static com.google.protobuf.Parser<Range> parser() {
return PARSER;
}
@java.lang.Override
public com.google.protobuf.Parser<Range> getParserForType() {
return PARSER;
}
@java.lang.Override
public com.iqser.red.service.redaction.v1.server.data.RangeProto.Range getDefaultInstanceForType() {
return DEFAULT_INSTANCE;
}
}
private static final com.google.protobuf.Descriptors.Descriptor
internal_static_Range_descriptor;
private static final
com.google.protobuf.GeneratedMessage.FieldAccessorTable
internal_static_Range_fieldAccessorTable;
public static com.google.protobuf.Descriptors.FileDescriptor
getDescriptor() {
return descriptor;
}
private static com.google.protobuf.Descriptors.FileDescriptor
descriptor;
static {
java.lang.String[] descriptorData = {
"\n\013Range.proto\"#\n\005Range\022\r\n\005start\030\001 \001(\005\022\013\n" +
"\003end\030\002 \001(\005B<\n.com.iqser.red.service.reda" +
"ction.v1.server.dataB\nRangeProtob\006proto3"
};
descriptor = com.google.protobuf.Descriptors.FileDescriptor
.internalBuildGeneratedFileFrom(descriptorData,
new com.google.protobuf.Descriptors.FileDescriptor[] {
});
internal_static_Range_descriptor =
getDescriptor().getMessageTypes().get(0);
internal_static_Range_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_Range_descriptor,
new java.lang.String[] { "Start", "End", });
descriptor.resolveAllFeaturesImmutable();
}
// @@protoc_insertion_point(outer_class_scope)
}

View File

@ -0,0 +1,25 @@
package com.iqser.red.service.redaction.v1.server.data.old;
import java.io.Serializable;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.FieldDefaults;
@Deprecated
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
public class DocumentPage implements Serializable {
int number;
int height;
int width;
int rotation;
}

View File

@ -0,0 +1,24 @@
package com.iqser.red.service.redaction.v1.server.data.old;
import java.io.Serializable;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.FieldDefaults;
@Deprecated
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
public class DocumentPositionData implements Serializable {
Long id;
int[] stringIdxToPositionIdx;
float[][] positions;
}

View File

@ -0,0 +1,158 @@
package com.iqser.red.service.redaction.v1.server.data.old;
import java.awt.geom.Rectangle2D;
import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Stream;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.FieldDefaults;
@Deprecated
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
public class DocumentStructure implements Serializable {
EntryData root;
public static class TableProperties implements Serializable {
public static final String NUMBER_OF_ROWS = "numberOfRows";
public static final String NUMBER_OF_COLS = "numberOfCols";
}
public static class ImageProperties implements Serializable {
public static final String TRANSPARENT = "transparent";
public static final String IMAGE_TYPE = "imageType";
public static final String POSITION = "position";
public static final String ID = "id";
public static final String REPRESENTATION_HASH = "representationHash";
}
public static class TableCellProperties implements Serializable {
public static final String B_BOX = "bBox";
public static final String ROW = "row";
public static final String COL = "col";
public static final String HEADER = "header";
}
public static class DuplicateParagraphProperties implements Serializable {
public static final String UNSORTED_TEXTBLOCK_ID = "utbid";
}
public static final String RECTANGLE_DELIMITER = ";";
public static Rectangle2D parseRectangle2D(String bBox) {
List<Float> floats = Arrays.stream(bBox.split(RECTANGLE_DELIMITER))
.map(Float::parseFloat)
.toList();
return new Rectangle2D.Float(floats.get(0), floats.get(1), floats.get(2), floats.get(3));
}
public static double[] parseRepresentationVector(String representationHash) {
String[] stringArray = representationHash.split("[,\\s]+");
double[] doubleArray = new double[stringArray.length];
for (int i = 0; i < stringArray.length; i++) {
doubleArray[i] = Double.parseDouble(stringArray[i]);
}
return doubleArray;
}
public EntryData get(List<Integer> tocId) {
if (tocId.isEmpty()) {
return root;
}
EntryData entry = root.children.get(tocId.get(0));
for (int id : tocId.subList(1, tocId.size())) {
entry = entry.children.get(id);
}
return entry;
}
public Stream<EntryData> streamAllEntries() {
return Stream.concat(Stream.of(root), root.children.stream())
.flatMap(DocumentStructure::flatten);
}
public String toString() {
return String.join("\n",
streamAllEntries().map(EntryData::toString)
.toList());
}
private static Stream<EntryData> flatten(EntryData entry) {
return Stream.concat(Stream.of(entry),
entry.children.stream()
.flatMap(DocumentStructure::flatten));
}
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
public static class EntryData implements Serializable {
NodeType type;
int[] treeId;
Long[] atomicBlockIds;
Long[] pageNumbers;
Map<String, String> properties;
List<EntryData> children;
Set<LayoutEngine> engines;
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("[");
for (int i : treeId) {
sb.append(i);
sb.append(",");
}
sb.delete(sb.length() - 1, sb.length());
sb.append("]: ");
sb.append(type);
sb.append(" atbs = ");
sb.append(atomicBlockIds.length);
return sb.toString();
}
}
}

View File

@ -0,0 +1,28 @@
package com.iqser.red.service.redaction.v1.server.data.old;
import java.io.Serializable;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.FieldDefaults;
@Deprecated
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
public class DocumentTextData implements Serializable {
Long id;
Long page;
String searchText;
int numberOnPage;
int start;
int end;
int[] lineBreaks;
}

View File

@ -0,0 +1,8 @@
package com.iqser.red.service.redaction.v1.server.data.old;
@Deprecated
public enum LayoutEngine {
ALGORITHM,
AI,
OUTLINE
}

View File

@ -0,0 +1,24 @@
package com.iqser.red.service.redaction.v1.server.data.old;
import java.io.Serializable;
import java.util.Locale;
@Deprecated
public enum NodeType implements Serializable {
DOCUMENT,
SECTION,
SUPER_SECTION,
HEADLINE,
PARAGRAPH,
TABLE,
TABLE_CELL,
IMAGE,
HEADER,
FOOTER;
public String toString() {
return this.name().charAt(0) + this.name().substring(1).toLowerCase(Locale.ROOT);
}
}

View File

@ -0,0 +1,197 @@
package com.iqser.red.service.redaction.v1.server.mapper;
import java.awt.geom.Rectangle2D;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.data.DocumentData;
import com.iqser.red.service.redaction.v1.server.data.DocumentPageProto.AllDocumentPages;
import com.iqser.red.service.redaction.v1.server.data.DocumentPageProto.DocumentPage;
import com.iqser.red.service.redaction.v1.server.data.DocumentPositionDataProto.AllDocumentPositionData;
import com.iqser.red.service.redaction.v1.server.data.DocumentPositionDataProto.DocumentPositionData;
import com.iqser.red.service.redaction.v1.server.data.DocumentPositionDataProto.DocumentPositionData.Position;
import com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure;
import com.iqser.red.service.redaction.v1.server.data.DocumentStructureWrapper;
import com.iqser.red.service.redaction.v1.server.data.DocumentTextDataProto.AllDocumentTextData;
import com.iqser.red.service.redaction.v1.server.data.DocumentTextDataProto.DocumentTextData;
import com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData;
import com.iqser.red.service.redaction.v1.server.data.LayoutEngineProto;
import com.iqser.red.service.redaction.v1.server.data.NodeTypeProto;
import com.iqser.red.service.redaction.v1.server.data.RangeProto;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.DuplicatedParagraph;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import lombok.experimental.UtilityClass;
@UtilityClass
public class DocumentDataMapper {
public DocumentData toDocumentData(Document document) {
List<DocumentTextData> documentTextData = document.streamTerminalTextBlocksInOrder()
.flatMap(textBlock -> textBlock.getAtomicTextBlocks()
.stream())
.distinct()
.map(DocumentDataMapper::toAtomicTextBlockData)
.toList();
AllDocumentTextData allDocumentTextData = AllDocumentTextData.newBuilder().addAllDocumentTextData(documentTextData).build();
List<DocumentPositionData> atomicPositionBlockData = document.streamTerminalTextBlocksInOrder()
.flatMap(textBlock -> textBlock.getAtomicTextBlocks()
.stream())
.distinct()
.map(DocumentDataMapper::toAtomicPositionBlockData)
.toList();
AllDocumentPositionData allDocumentPositionData = AllDocumentPositionData.newBuilder().addAllDocumentPositionData(atomicPositionBlockData).build();
List<DocumentPage> documentPageData = document.getPages()
.stream()
.map(DocumentDataMapper::toPageData)
.toList();
AllDocumentPages allDocumentPages = AllDocumentPages.newBuilder().addAllDocumentPages(documentPageData).build();
DocumentStructureWrapper tableOfContentsData = toDocumentTreeData(document.getDocumentTree());
return DocumentData.builder()
.documentTextData(allDocumentTextData)
.documentPositionData(allDocumentPositionData)
.documentPages(allDocumentPages)
.documentStructureWrapper(tableOfContentsData)
.build();
}
private DocumentStructureWrapper toDocumentTreeData(DocumentTree documentTree) {
return new DocumentStructureWrapper(DocumentStructure.newBuilder().setRoot(toEntryData(documentTree.getRoot())).build());
}
private EntryData toEntryData(DocumentTree.Entry entry) {
List<Long> atomicTextBlocks;
if (entry.getNode().isLeaf()) {
atomicTextBlocks = toAtomicTextBlockIds(entry.getNode().getLeafTextBlock());
} else {
atomicTextBlocks = new ArrayList<>();
}
Map<String, String> properties = switch (entry.getType()) {
case TABLE -> PropertiesMapper.buildTableProperties((Table) entry.getNode());
case TABLE_CELL -> PropertiesMapper.buildTableCellProperties((TableCell) entry.getNode());
case IMAGE -> PropertiesMapper.buildImageProperties((Image) entry.getNode());
case PARAGRAPH ->
entry.getNode() instanceof DuplicatedParagraph duplicatedParagraph ? PropertiesMapper.buildDuplicateParagraphProperties(duplicatedParagraph) : new HashMap<>();
default -> new HashMap<>();
};
var documentBuilder = EntryData.newBuilder()
.addAllTreeId(entry.getTreeId())
.addAllChildren(entry.getChildren()
.stream()
.map(DocumentDataMapper::toEntryData)
.toList())
.setType(resolveType(entry.getType()))
.addAllAtomicBlockIds(atomicTextBlocks)
.addAllPageNumbers(entry.getNode().getPages()
.stream()
.map(Page::getNumber)
.map(Integer::longValue)
.toList())
.putAllProperties(properties);
if (entry.getNode() != null) {
documentBuilder.addAllEngines(entry.getNode().getEngines()
.stream()
.map(engine -> LayoutEngineProto.LayoutEngine.valueOf(engine.name()))
.toList());
} else {
documentBuilder.addAllEngines(new HashSet<>(Set.of(LayoutEngineProto.LayoutEngine.ALGORITHM)));
}
return documentBuilder.build();
}
private static NodeTypeProto.NodeType resolveType(NodeType type) {
return NodeTypeProto.NodeType.valueOf(type.name());
}
private List<Long> toAtomicTextBlockIds(TextBlock textBlock) {
return textBlock.getAtomicTextBlocks()
.stream()
.map(AtomicTextBlock::getId)
.toList();
}
private DocumentPage toPageData(Page p) {
return DocumentPage.newBuilder().setRotation(p.getRotation()).setHeight(p.getHeight()).setWidth(p.getWidth()).setNumber(p.getNumber()).build();
}
private DocumentTextData toAtomicTextBlockData(AtomicTextBlock atomicTextBlock) {
return DocumentTextData.newBuilder()
.setId(atomicTextBlock.getId())
.setPage(atomicTextBlock.getPage().getNumber().longValue())
.setSearchText(atomicTextBlock.getSearchText())
.setNumberOnPage(atomicTextBlock.getNumberOnPage())
.setStart(atomicTextBlock.getTextRange().start())
.setEnd(atomicTextBlock.getTextRange().end())
.addAllLineBreaks(atomicTextBlock.getLineBreaks())
.addAllItalicTextRanges(atomicTextBlock.getItalicTextRanges()
.stream()
.map(r -> RangeProto.Range.newBuilder().setStart(r.start()).setEnd(r.end()).build())
.toList())
.addAllBoldTextRanges(atomicTextBlock.getBoldTextRanges()
.stream()
.map(r -> RangeProto.Range.newBuilder().setStart(r.start()).setEnd(r.end()).build())
.toList())
.build();
}
private DocumentPositionData toAtomicPositionBlockData(AtomicTextBlock atomicTextBlock) {
return DocumentPositionData.newBuilder()
.setId(atomicTextBlock.getId())
.addAllPositions(toPositions(atomicTextBlock.getPositions()))
.addAllStringIdxToPositionIdx(atomicTextBlock.getStringIdxToPositionIdx())
.build();
}
private static List<Position> toPositions(List<Rectangle2D> rects) {
List<Position> positions = new ArrayList<>();
for (Rectangle2D rect : rects) {
positions.add(toPosition(rect));
}
return positions;
}
private static Position toPosition(Rectangle2D rect) {
return Position.newBuilder().addValue((float) rect.getMinX()).addValue((float) rect.getMinY()).addValue((float) rect.getWidth()).addValue((float) rect.getHeight()).build();
}
}

View File

@ -1,17 +1,18 @@
package com.iqser.red.service.redaction.v1.server.service.document;
package com.iqser.red.service.redaction.v1.server.mapper;
import static com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPageProto.DocumentPage;
import static com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionDataProto.AllDocumentPositionData;
import static com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextDataProto.AllDocumentTextData;
import static com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.EntryDataProto.EntryData;
import static com.iqser.red.service.redaction.v1.server.data.DocumentPageProto.DocumentPage;
import static com.iqser.red.service.redaction.v1.server.data.DocumentPositionDataProto.AllDocumentPositionData;
import static com.iqser.red.service.redaction.v1.server.data.DocumentTextDataProto.AllDocumentTextData;
import static com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentData;
import com.iqser.red.service.redaction.v1.server.data.DocumentData;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.DuplicatedParagraph;
@ -19,6 +20,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Header;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.LayoutEngine;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section;
@ -26,6 +28,8 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNo
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SuperSection;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableOfContents;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableOfContentsItem;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
@ -41,7 +45,7 @@ public class DocumentGraphMapper {
DocumentTree documentTree = new DocumentTree(document);
Context context = new Context(documentData, documentTree);
context.pageData.addAll(documentData.getDocumentPages().getDocumentPagesList()
context.pages.addAll(documentData.getDocumentPages().getDocumentPagesList()
.stream()
.map(DocumentGraphMapper::buildPage)
.toList());
@ -49,7 +53,7 @@ public class DocumentGraphMapper {
context.documentTree.getRoot().getChildren().addAll(buildEntries(documentData.getDocumentStructure().getRoot().getChildrenList(), context));
document.setDocumentTree(context.documentTree);
document.setPages(new HashSet<>(context.pageData));
document.setPages(new HashSet<>(context.pages));
document.setNumberOfPages(documentData.getDocumentPages().getDocumentPagesCount());
document.setTextBlock(document.getTextBlock());
@ -70,20 +74,21 @@ public class DocumentGraphMapper {
SemanticNode node = switch (entryData.getType()) {
case SECTION -> buildSection(context);
case SUPER_SECTION -> buildSuperSection(context);
case PARAGRAPH -> buildParagraph(context, entryData.getProperties());
case PARAGRAPH -> buildParagraph(context, entryData.getPropertiesMap());
case HEADLINE -> buildHeadline(context);
case HEADER -> buildHeader(context);
case FOOTER -> buildFooter(context);
case TABLE -> buildTable(context, entryData.getProperties());
case TABLE_CELL -> buildTableCell(context, entryData.getProperties());
case IMAGE -> buildImage(context, entryData.getProperties(), entryData.getPageNumbersList());
case TABLE -> buildTable(context, entryData.getPropertiesMap());
case TABLE_CELL -> buildTableCell(context, entryData.getPropertiesMap());
case IMAGE -> buildImage(context, entryData.getPropertiesMap(), entryData.getPageNumbersList());
case TABLE_OF_CONTENTS -> buildTableOfContents(context);
case TABLE_OF_CONTENTS_ITEM -> buildTableOfContentsItem(context);
default -> throw new UnsupportedOperationException("Not yet implemented for type " + entryData.getType());
};
if (entryData.getAtomicBlockIdsCount() > 0) {
TextBlock textBlock = toTextBlock(entryData.getAtomicBlockIdsList(), context, node);
node.setLeafTextBlock(textBlock);
switch (entryData.getType()) {
case HEADER -> pages.forEach(page -> page.setHeader((Header) node));
case FOOTER -> pages.forEach(page -> page.setFooter((Footer) node));
@ -91,15 +96,30 @@ public class DocumentGraphMapper {
default -> textBlock.getAtomicTextBlocks()
.forEach(atb -> atb.getPage().getTextBlocksOnPage().add(atb));
}
}
List<Integer> treeId = entryData.getTreeIdList();
entryData.getEnginesList()
.forEach(node::addEngine);
node.setTreeId(treeId);
entryData.getEnginesList()
.stream()
.map(engine -> LayoutEngine.valueOf(engine.name()))
.forEach(node::addEngine);
newEntries.add(DocumentTree.Entry.builder().treeId(treeId).children(buildEntries(entryData.getChildrenList(), context)).node(node).build());
} return newEntries;
}
return newEntries;
}
private static TableOfContents buildTableOfContents(Context context) {
return TableOfContents.builder().documentTree(context.documentTree).build();
}
private static TableOfContentsItem buildTableOfContentsItem(Context context) {
return TableOfContentsItem.builder().documentTree(context.documentTree).build();
}
@ -184,10 +204,13 @@ public class DocumentGraphMapper {
private AtomicTextBlock getAtomicTextBlock(Context context, SemanticNode parent, Long atomicTextBlockId) {
return AtomicTextBlock.fromAtomicTextBlockData(context.documentTextData.getDocumentTextData(Math.toIntExact(atomicTextBlockId)),
context.documentPositionData.getDocumentPositionData(Math.toIntExact(atomicTextBlockId)),
return AtomicTextBlock.fromAtomicTextBlockData(context.documentTextDataBlockData.getDocumentTextDataList()
.get(Math.toIntExact(atomicTextBlockId)),
context.atomicPositionBlockData.getDocumentPositionDataList()
.get(Math.toIntExact(atomicTextBlockId)),
parent,
context.getPage(context.documentTextData.getDocumentTextData(Math.toIntExact(atomicTextBlockId)).getPage()));
context.getPage(context.documentTextDataBlockData.getDocumentTextDataList()
.get(Math.toIntExact(atomicTextBlockId)).getPage()));
}
@ -200,28 +223,31 @@ public class DocumentGraphMapper {
static final class Context {
private final DocumentTree documentTree;
private final List<Page> pageData;
private final AllDocumentTextData documentTextData;
private final AllDocumentPositionData documentPositionData;
private final List<Page> pages;
private final AllDocumentTextData documentTextDataBlockData;
private final AllDocumentPositionData atomicPositionBlockData;
Context(DocumentData documentData, DocumentTree documentTree) {
this.documentTree = documentTree;
this.pageData = new ArrayList<>();
this.documentTextData = documentData.getDocumentTextData();
this.documentPositionData = documentData.getDocumentPositionData();
this.pages = new LinkedList<>();
this.documentTextDataBlockData = documentData.getDocumentTextData();
this.atomicPositionBlockData = documentData.getDocumentPositionData();
}
private Page getPage(Long pageIndex) {
public Page getPage(Long pageIndex) {
Page page = pageData.get(Math.toIntExact(pageIndex) - 1);
assert page.getNumber() == Math.toIntExact(pageIndex);
return page;
return pages.stream()
.filter(page -> page.getNumber() == Math.toIntExact(pageIndex))
.findFirst()
.orElseThrow(() -> new NoSuchElementException(String.format("ClassificationPage with number %d not found", pageIndex)));
}
}
}

View File

@ -0,0 +1,151 @@
package com.iqser.red.service.redaction.v1.server.mapper;
import java.awt.geom.Rectangle2D;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import com.iqser.red.service.redaction.v1.server.data.DocumentStructureWrapper;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.DuplicatedParagraph;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import lombok.experimental.UtilityClass;
@UtilityClass
public class PropertiesMapper {
public static Map<String, String> buildImageProperties(Image image) {
Map<String, String> properties = new HashMap<>();
properties.put(DocumentStructureWrapper.ImageProperties.IMAGE_TYPE, image.getImageType().name());
properties.put(DocumentStructureWrapper.ImageProperties.TRANSPARENT, String.valueOf(image.isTransparent()));
properties.put(DocumentStructureWrapper.ImageProperties.POSITION, toString(image.getPosition()));
properties.put(DocumentStructureWrapper.ImageProperties.ID, image.getId());
properties.put(DocumentStructureWrapper.ImageProperties.REPRESENTATION_HASH, image.getRepresentationHash());
return properties;
}
public static Map<String, String> buildTableCellProperties(TableCell tableCell) {
Map<String, String> properties = new HashMap<>();
properties.put(DocumentStructureWrapper.TableCellProperties.ROW, String.valueOf(tableCell.getRow()));
properties.put(DocumentStructureWrapper.TableCellProperties.COL, String.valueOf(tableCell.getCol()));
properties.put(DocumentStructureWrapper.TableCellProperties.HEADER, String.valueOf(tableCell.isHeader()));
if (tableCell.getPages().size() > 1 || tableCell.getBBox().keySet().size() > 1) {
throw new IllegalArgumentException("TableCell can only occur on a single page!");
}
String bBoxString = toString(tableCell.getBBox()
.get(tableCell.getPages()
.stream()
.findFirst()
.get()));
properties.put(DocumentStructureWrapper.TableCellProperties.B_BOX, bBoxString);
return properties;
}
public static Map<String, String> buildTableProperties(Table table) {
Map<String, String> properties = new HashMap<>();
properties.put(DocumentStructureWrapper.TableProperties.NUMBER_OF_ROWS, String.valueOf(table.getNumberOfRows()));
properties.put(DocumentStructureWrapper.TableProperties.NUMBER_OF_COLS, String.valueOf(table.getNumberOfCols()));
return properties;
}
public static void parseImageProperties(Map<String, String> properties, Image.ImageBuilder<?, ?> builder) {
builder.imageType(parseImageType(properties.get(DocumentStructureWrapper.ImageProperties.IMAGE_TYPE)));
builder.transparent(Boolean.parseBoolean(properties.get(DocumentStructureWrapper.ImageProperties.TRANSPARENT)));
builder.position(DocumentStructureWrapper.parseRectangle2D(properties.get(DocumentStructureWrapper.ImageProperties.POSITION)));
}
public static void parseTableCellProperties(Map<String, String> properties, TableCell.TableCellBuilder<?, ?> builder) {
builder.row(Integer.parseInt(properties.get(DocumentStructureWrapper.TableCellProperties.ROW)));
builder.col(Integer.parseInt(properties.get(DocumentStructureWrapper.TableCellProperties.COL)));
builder.header(Boolean.parseBoolean(properties.get(DocumentStructureWrapper.TableCellProperties.HEADER)));
builder.bBox(DocumentStructureWrapper.parseRectangle2D(properties.get(DocumentStructureWrapper.TableCellProperties.B_BOX)));
}
public static void parseTableProperties(Map<String, String> properties, Table.TableBuilder builder) {
builder.numberOfRows(Integer.parseInt(properties.get(DocumentStructureWrapper.TableProperties.NUMBER_OF_ROWS)));
builder.numberOfCols(Integer.parseInt(properties.get(DocumentStructureWrapper.TableProperties.NUMBER_OF_COLS)));
}
public static Map<String, String> buildDuplicateParagraphProperties(DuplicatedParagraph duplicatedParagraph) {
Map<String, String> properties = new HashMap<>();
properties.put(DocumentStructureWrapper.DuplicateParagraphProperties.UNSORTED_TEXTBLOCK_ID,
Arrays.toString(toAtomicTextBlockIds(duplicatedParagraph.getUnsortedLeafTextBlock())));
return properties;
}
public static boolean isDuplicateParagraph(Map<String, String> properties) {
return properties.containsKey(DocumentStructureWrapper.DuplicateParagraphProperties.UNSORTED_TEXTBLOCK_ID);
}
public static List<Long> getUnsortedTextblockIds(Map<String, String> properties) {
return toLongList(properties.get(DocumentStructureWrapper.DuplicateParagraphProperties.UNSORTED_TEXTBLOCK_ID));
}
public static List<Long> toLongList(String ids) {
return Arrays.stream(ids.substring(1, ids.length() - 1).trim().split(","))
.map(Long::valueOf)
.toList();
}
private static ImageType parseImageType(String imageType) {
try {
return ImageType.valueOf(imageType.toUpperCase(Locale.ROOT));
} catch (IllegalArgumentException e) {
return ImageType.OTHER;
}
}
public static String toString(Rectangle2D rectangle2D) {
return String.format(Locale.US,
"%f%s%f%s%f%s%f",
rectangle2D.getX(),
DocumentStructureWrapper.RECTANGLE_DELIMITER,
rectangle2D.getY(),
DocumentStructureWrapper.RECTANGLE_DELIMITER,
rectangle2D.getWidth(),
DocumentStructureWrapper.RECTANGLE_DELIMITER,
rectangle2D.getHeight());
}
private static Long[] toAtomicTextBlockIds(TextBlock textBlock) {
return textBlock.getAtomicTextBlocks()
.stream()
.map(AtomicTextBlock::getId)
.toArray(Long[]::new);
}
}

View File

@ -0,0 +1,116 @@
package com.iqser.red.service.redaction.v1.server.model.document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Header;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SuperSection;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableOfContents;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableOfContentsItem;
public abstract class AbstractNodeVisitor implements NodeVisitor {
@Override
public void visit(Document document) {
defaultVisit(document);
}
@Override
public void visit(SuperSection superSection) {
defaultVisit(superSection);
}
@Override
public void visit(Section section) {
defaultVisit(section);
}
@Override
public void visit(Headline headline) {
defaultVisit(headline);
}
@Override
public void visit(Paragraph paragraph) {
defaultVisit(paragraph);
}
@Override
public void visit(Footer footer) {
defaultVisit(footer);
}
@Override
public void visit(Header header) {
defaultVisit(header);
}
@Override
public void visit(Image image) {
defaultVisit(image);
}
@Override
public void visit(Table table) {
defaultVisit(table);
}
@Override
public void visit(TableCell tableCell) {
defaultVisit(tableCell);
}
@Override
public void visit(TableOfContents toc) {
defaultVisit(toc);
}
@Override
public void visit(TableOfContentsItem toci) {
defaultVisit(toci);
}
public void visitNodeDefault(SemanticNode node) {
// By default, it does nothing
}
protected void defaultVisit(SemanticNode semanticNode) {
visitNodeDefault(semanticNode);
semanticNode.streamChildren()
.forEach(node -> node.accept(this));
}
}

View File

@ -296,6 +296,22 @@ public class DocumentTree {
}
public Optional<Entry> findEntryById(List<Integer> treeId) {
if (treeId.isEmpty()) {
return Optional.of(root);
}
Entry entry = root;
for (int id : treeId) {
if (id < 0 || id >= entry.children.size()) {
return Optional.empty();
}
entry = entry.children.get(id);
}
return Optional.of(entry);
}
public Stream<Entry> mainEntries() {
return root.children.stream();

View File

@ -0,0 +1,53 @@
package com.iqser.red.service.redaction.v1.server.model.document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Header;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SuperSection;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableOfContents;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableOfContentsItem;
public interface NodeVisitor {
void visit(Document document);
void visit(SuperSection superSection);
void visit(Section section);
void visit(Headline headline);
void visit(Paragraph paragraph);
void visit(Footer footer);
void visit(Header header);
void visit(Image image);
void visit(Table table);
void visit(TableCell tableCell);
void visit(TableOfContents tableOfContents);
void visit(TableOfContentsItem tableOfContentsItem);
}

View File

@ -134,6 +134,12 @@ public class TextRange implements Comparable<TextRange> {
}
public boolean containsExclusive(int index) {
return start <= index && index < end;
}
/**
* Checks if this {@link TextRange} intersects with another {@link TextRange}.
*

View File

@ -6,7 +6,6 @@ import java.util.PriorityQueue;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.drools.RuleIdentifier;
import lombok.NonNull;
@ -95,6 +94,7 @@ public interface IEntity {
*/
// Don't use default accessor pattern (e.g. isApplied()), as it might lead to errors in drools due to property-specific optimization of the drools planner.
default boolean applied() {
if (this.getMatchedRule().isHigherPriorityThanManual()) {
return getMatchedRule().isApplied();
}
@ -120,6 +120,7 @@ public interface IEntity {
* @return True if ignored, false otherwise.
*/
default boolean ignored() {
if (this.getMatchedRule().isHigherPriorityThanManual()) {
return getMatchedRule().isIgnored();
}
@ -135,6 +136,7 @@ public interface IEntity {
* @return True if removed, false otherwise.
*/
default boolean removed() {
if (this.getMatchedRule().isHigherPriorityThanManual()) {
return getMatchedRule().isRemoved();
}
@ -149,6 +151,7 @@ public interface IEntity {
* @return True if resized, false otherwise.
*/
default boolean resized() {
if (this.getMatchedRule().isHigherPriorityThanManual()) {
return getMatchedRule().isRemoved();
}
@ -384,16 +387,10 @@ public interface IEntity {
*
* @return The built reason string.
*/
default String buildReasonWithManualChangeDescriptions() {
default String buildReason() {
if (getManualOverwrite().getDescriptions().isEmpty()) {
return getMatchedRule().getReason();
}
if (getMatchedRule().getReason().isEmpty()) {
return String.join(", ", getManualOverwrite().getDescriptions());
}
return getMatchedRule().getReason() + ", " + String.join(", ", getManualOverwrite().getDescriptions());
}
/**

View File

@ -1,10 +1,8 @@
package com.iqser.red.service.redaction.v1.server.model.document.entity;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation;
@ -14,7 +12,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
import com.iqser.red.service.redaction.v1.server.model.RectangleWithPage;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
@ -26,18 +23,9 @@ import lombok.experimental.FieldDefaults;
@FieldDefaults(level = AccessLevel.PRIVATE)
public class ManualChangeOverwrite {
private static final Map<Class<? extends BaseAnnotation>, String> MANUAL_CHANGE_DESCRIPTIONS = Map.of(//
ManualRedactionEntry.class, "created by manual change", //
ManualLegalBasisChange.class, "legal basis was manually changed", //
ManualResizeRedaction.class, "resized by manual override", //
ManualForceRedaction.class, "forced by manual override", //
IdRemoval.class, "removed by manual override", //
ManualRecategorization.class, "recategorized by manual override");
@Builder.Default
List<BaseAnnotation> manualChanges = new LinkedList<>();
boolean changed;
List<String> descriptions;
String type;
String legalBasis;
String section;
@ -63,6 +51,7 @@ public class ManualChangeOverwrite {
this.manualChanges = new LinkedList<>();
}
public ManualChangeOverwrite(EntityType entityType, String section) {
this(entityType);
@ -95,8 +84,6 @@ public class ManualChangeOverwrite {
private void updateFields(List<BaseAnnotation> sortedManualChanges) {
descriptions = new LinkedList<>();
for (BaseAnnotation manualChange : sortedManualChanges) {
// ManualRedactionEntries are created prior to rule execution in analysis service.
@ -151,8 +138,6 @@ public class ManualChangeOverwrite {
legalBasis = recategorization.getLegalBasis();
}
}
descriptions.add(MANUAL_CHANGE_DESCRIPTIONS.get(manualChange.getClass()));
}
changed = false;
}
@ -245,13 +230,6 @@ public class ManualChangeOverwrite {
}
public List<String> getDescriptions() {
calculateCurrentOverride();
return descriptions == null ? Collections.emptyList() : descriptions;
}
public Optional<List<RectangleWithPage>> getPositions() {
calculateCurrentOverride();

View File

@ -5,9 +5,6 @@ import java.util.List;
import java.util.Objects;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.drools.RuleIdentifier;
import com.iqser.red.service.redaction.v1.server.model.drools.RuleType;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
@ -57,11 +54,14 @@ public final class MatchedRule implements Comparable<MatchedRule> {
return MatchedRule.builder().ruleIdentifier(RuleIdentifier.empty()).build();
}
public boolean isHigherPriorityThanManual() {
return (-1 < RULE_TYPE_PRIORITIES.indexOf(this.ruleIdentifier.type())) &&
(RULE_TYPE_PRIORITIES.indexOf(this.ruleIdentifier.type()) < RULE_TYPE_PRIORITIES.indexOf(MANUAL_TYPE));
return (-1 < RULE_TYPE_PRIORITIES.indexOf(this.ruleIdentifier.type())) && (RULE_TYPE_PRIORITIES.indexOf(this.ruleIdentifier.type()) < RULE_TYPE_PRIORITIES.indexOf(
MANUAL_TYPE));
}
/**
* Returns a modified instance of {@link MatchedRule} based on its applied status.
* If the rule has been applied, it returns a new {@link MatchedRule} instance that retains all properties of the original

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.model;
package com.iqser.red.service.redaction.v1.server.model.document.entity;
import java.awt.geom.Rectangle2D;

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.model.drools;
package com.iqser.red.service.redaction.v1.server.model.document.entity;
import java.util.Objects;

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.model.drools;
package com.iqser.red.service.redaction.v1.server.model.document.entity;
import java.util.regex.Pattern;

View File

@ -9,7 +9,6 @@ import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngineProto.LayoutEngine;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;

View File

@ -9,8 +9,8 @@ import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.model.document.ConsecutiveTextBlockCollector;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import lombok.AccessLevel;
@ -169,4 +169,11 @@ public class Document extends AbstractSemanticNode {
return bBox;
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
}

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import lombok.AccessLevel;
@ -57,4 +58,11 @@ public class Footer extends AbstractSemanticNode {
return getTreeId() + ": " + NodeType.FOOTER + ": " + leafTextBlock.buildSummary();
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
}

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import lombok.AccessLevel;
@ -60,4 +61,11 @@ public class Header extends AbstractSemanticNode {
return getTreeId() + ": " + NodeType.HEADER + ": " + leafTextBlock.buildSummary();
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
}

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
@ -99,4 +100,11 @@ public class Headline extends AbstractSemanticNode {
.isPresent();
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
}

View File

@ -8,6 +8,7 @@ import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite;
@ -24,8 +25,7 @@ import lombok.experimental.FieldDefaults;
import lombok.experimental.SuperBuilder;
/**
*
Represents an image within the document.
* Represents an image within the document.
*/
@Data
@SuperBuilder
@ -37,6 +37,7 @@ public class Image extends AbstractSemanticNode implements IEntity {
String id;
String representationHash;
TextBlock leafTextBlock;
ImageType imageType;
@ -73,6 +74,18 @@ public class Image extends AbstractSemanticNode implements IEntity {
}
public boolean isFullPageImage() {
return imageType.equals(ImageType.OCR) || getArea() >= 0.5 * page.getArea();
}
private double getArea() {
return position.getWidth() * position.getHeight();
}
@Override
public TextRange getTextRange() {
@ -90,14 +103,15 @@ public class Image extends AbstractSemanticNode implements IEntity {
@Override
public String type() {
return getManualOverwrite().getType().orElse(imageType.toString().toLowerCase(Locale.ENGLISH));
return getManualOverwrite().getType()
.orElse(imageType.toString().toLowerCase(Locale.ENGLISH));
}
@Override
public String toString() {
return getTreeId() + ": " + getValue() + " " + position;
return getTreeId() + ": " + getValue() + " [%.2f,%.2f,%.2f,%.2f]".formatted(position.getX(), position.getY(), position.getWidth(), position.getHeight());
}
@ -148,4 +162,18 @@ public class Image extends AbstractSemanticNode implements IEntity {
return (area / calculatedIntersection) > containmentThreshold;
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
@Override
public boolean isLeaf() {
return true;
}
}

View File

@ -0,0 +1,7 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
public enum LayoutEngine {
ALGORITHM,
AI,
OUTLINE
}

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import lombok.AccessLevel;
@ -49,4 +50,11 @@ public class Paragraph extends AbstractSemanticNode {
return getTreeId() + ": " + NodeType.PARAGRAPH + ": " + leafTextBlock.buildSummary();
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
}

View File

@ -1,5 +1,7 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Data;
@ -89,4 +91,11 @@ public class Section extends AbstractSemanticNode {
return streamAllSubNodesOfType(NodeType.HEADLINE).anyMatch(h -> h.containsStringIgnoreCase(value));
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
}

View File

@ -20,7 +20,7 @@ import lombok.experimental.FieldDefaults;
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class SectionIdentifier {
public static Pattern numericalIdentifierPattern = Pattern.compile("^[\\s]?(\\d+)[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?");
public static Pattern numericalIdentifierPattern = Pattern.compile("^[\\s]?(\\d{1,2})(?:[\\s.,;](\\d{1,2}))?(?:[\\s.,;](\\d{1,2}))?(?:[\\s.,;](\\d{1,2}))?");
public static Pattern alphanumericIdentifierPattern = Pattern.compile("^[\\s]?[A-Za-z][\\s.,;]?(\\d+)[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?[\\s.,;]?");
protected enum Format {

View File

@ -14,18 +14,17 @@ import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.model.document.ConsecutiveTextBlockCollector;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConsecutiveTextBlockCollector;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
import com.iqser.red.service.redaction.v1.server.service.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations;
import com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngineProto.LayoutEngine;
public interface SemanticNode {
@ -819,16 +818,12 @@ public interface SemanticNode {
/**
* Accepts a {@link NodeVisitor} and initiates a depth-first traversal of the semantic tree rooted at this node.
* The visitor's {@link NodeVisitor#visit(SemanticNode)} method is invoked for each node encountered during the traversal.
* The visitor's {@link NodeVisitor#visit} method is invoked for each node encountered during the traversal.
*
* @param visitor The {@link NodeVisitor} to accept and apply during the traversal.
* @see NodeVisitor
*/
default void accept(NodeVisitor visitor) {
visitor.visit(this);
streamChildren().forEach(childNode -> childNode.accept(visitor));
}
void accept(NodeVisitor visitor);
/**

View File

@ -1,5 +1,7 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Data;
@ -89,4 +91,11 @@ public class SuperSection extends AbstractSemanticNode {
return streamAllSubNodesOfType(NodeType.HEADLINE).anyMatch(h -> h.containsStringIgnoreCase(value));
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
}

View File

@ -14,10 +14,10 @@ import java.util.stream.IntStream;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngineProto.LayoutEngine;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
@ -419,4 +419,11 @@ public class Table implements SemanticNode {
return treeId.toString() + ": " + NodeType.TABLE + ": #cols: " + numberOfCols + ", #rows: " + numberOfRows + ", " + this.getTextBlock().buildSummary();
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
}

View File

@ -4,6 +4,7 @@ import java.awt.geom.Rectangle2D;
import java.util.HashMap;
import java.util.Map;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
@ -80,4 +81,11 @@ public class TableCell extends AbstractSemanticNode {
return getTreeId() + ": " + NodeType.TABLE_CELL + ": " + this.getTextBlock().buildSummary();
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
}

View File

@ -0,0 +1,47 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.FieldDefaults;
import lombok.experimental.SuperBuilder;
@Data
@SuperBuilder
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
@EqualsAndHashCode(callSuper = true)
public class TableOfContents extends AbstractSemanticNode {
@Override
public NodeType getType() {
return NodeType.TABLE_OF_CONTENTS;
}
public Headline getHeadline() {
return streamChildrenOfType(NodeType.HEADLINE).map(node -> (Headline) node)
.findFirst()
.orElseGet(() -> getParent().getHeadline());
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
@Override
public String toString() {
return getTreeId() + ": " + NodeType.TABLE_OF_CONTENTS_ITEM + ": " + getTextBlock().buildSummary();
}
}

View File

@ -0,0 +1,57 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.FieldDefaults;
import lombok.experimental.SuperBuilder;
@Data
@SuperBuilder
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
@EqualsAndHashCode(callSuper = true)
public class TableOfContentsItem extends AbstractSemanticNode {
TextBlock leafTextBlock;
@Override
public NodeType getType() {
return NodeType.TABLE_OF_CONTENTS_ITEM;
}
@Override
public boolean isLeaf() {
return true;
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
@Override
public TextBlock getTextBlock() {
return leafTextBlock;
}
@Override
public String toString() {
return getTreeId() + ": " + NodeType.TABLE_OF_CONTENTS_ITEM + ": " + leafTextBlock.buildSummary();
}
}

View File

@ -1,6 +1,6 @@
package com.iqser.red.service.redaction.v1.server.model.document.textblock;
import static com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionDataProto.DocumentPositionData;
import static com.iqser.red.service.redaction.v1.server.data.DocumentPositionDataProto.DocumentPositionData;
import static java.lang.String.format;
import java.awt.geom.Rectangle2D;
@ -16,12 +16,12 @@ import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import com.iqser.red.service.redaction.v1.server.data.DocumentPositionDataProto.DocumentPositionData.Position;
import com.iqser.red.service.redaction.v1.server.data.DocumentTextDataProto.DocumentTextData;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionDataProto.DocumentPositionData.Position;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextDataProto.DocumentTextData;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
@ -29,6 +29,7 @@ import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.NonNull;
import lombok.experimental.FieldDefaults;
@Data
@ -37,21 +38,33 @@ import lombok.experimental.FieldDefaults;
@FieldDefaults(level = AccessLevel.PRIVATE)
public class AtomicTextBlock implements TextBlock {
@NonNull
Long id;
@NonNull
Integer numberOnPage;
@NonNull
Page page;
//string coordinates
@NonNull
TextRange textRange;
@NonNull
String searchText;
@NonNull
List<Integer> lineBreaks;
@NonNull
List<TextRange> italicTextRanges;
@NonNull
List<TextRange> boldTextRanges;
SoftReference<String> searchTextLowerCaseCache;
SoftReference<List<String>> wordsCache;
//position coordinates
@NonNull
List<Integer> stringIdxToPositionIdx;
@Getter
@NonNull
List<Rectangle2D> positions;
@EqualsAndHashCode.Exclude
@ -77,6 +90,8 @@ public class AtomicTextBlock implements TextBlock {
.stringIdxToPositionIdx(Collections.emptyList())
.positions(Collections.emptyList())
.parent(parent)
.boldTextRanges(Collections.emptyList())
.italicTextRanges(Collections.emptyList())
.build();
}
@ -92,6 +107,14 @@ public class AtomicTextBlock implements TextBlock {
.lineBreaks(atomicTextBlockData.getLineBreaksList())
.stringIdxToPositionIdx(atomicPositionBlockData.getStringIdxToPositionIdxList())
.positions(toRectangle2DList(atomicPositionBlockData.getPositionsList()))
.italicTextRanges(atomicTextBlockData.getItalicTextRangesList()
.stream()
.map(r -> new TextRange(r.getStart(), r.getEnd()))
.toList())
.boldTextRanges(atomicTextBlockData.getBoldTextRangesList()
.stream()
.map(r -> new TextRange(r.getStart(), r.getEnd()))
.toList())
.parent(parent)
.build();
}

View File

@ -18,6 +18,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import lombok.AccessLevel;
import lombok.Data;
import lombok.NonNull;
import lombok.experimental.FieldDefaults;
@Data
@ -162,6 +163,26 @@ public class ConcatenatedTextBlock implements TextBlock {
}
@Override
public List<TextRange> getItalicTextRanges() {
return getAtomicTextBlocks().stream()
.flatMap(atomicTextBlock -> atomicTextBlock.getItalicTextRanges()
.stream())
.toList();
}
@Override
public List<TextRange> getBoldTextRanges() {
return getAtomicTextBlocks().stream()
.flatMap(atomicTextBlock -> atomicTextBlock.getBoldTextRanges()
.stream())
.toList();
}
@Override
public Rectangle2D getPosition(int stringIdx) {
@ -279,6 +300,7 @@ public class ConcatenatedTextBlock implements TextBlock {
}
@NonNull
@Override
public String toString() {

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.model.document;
package com.iqser.red.service.redaction.v1.server.model.document.textblock;
import java.util.LinkedList;
import java.util.List;
@ -10,9 +10,6 @@ import java.util.function.Supplier;
import java.util.stream.Collector;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConcatenatedTextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import lombok.NoArgsConstructor;
@NoArgsConstructor

View File

@ -55,6 +55,12 @@ public interface TextBlock extends CharSequence {
String subSequenceWithLineBreaks(TextRange textRange);
List<TextRange> getItalicTextRanges();
List<TextRange> getBoldTextRanges();
int numberOfLines();

View File

@ -313,7 +313,7 @@ public class RedactionSearchUtility {
while (matcher.find()) {
boundaries.add(new TextRange(matcher.start(group) + textBlock.getTextRange().start(), matcher.end(group) + textBlock.getTextRange().start()));
}
}catch (StackOverflowError stackOverflowError){
} catch (StackOverflowError stackOverflowError) {
log.warn("Stackoverflow error for pattern {} in text: {}", pattern.pattern(), textBlock);
}
return boundaries;
@ -322,7 +322,6 @@ public class RedactionSearchUtility {
private static List<TextRange> getTextRangesByPatternWithLineBreaks(TextBlock textBlock, int group, Pattern pattern) {
String searchTextWithLineBreaks = textBlock.searchTextWithLineBreaks();
Matcher matcher = pattern.matcher(searchTextWithLineBreaks);
List<TextRange> boundaries = new LinkedList<>();
@ -330,7 +329,7 @@ public class RedactionSearchUtility {
while (matcher.find()) {
boundaries.add(new TextRange(matcher.start(group) + textBlock.getTextRange().start(), matcher.end(group) + textBlock.getTextRange().start()));
}
}catch (StackOverflowError stackOverflowError){
} catch (StackOverflowError stackOverflowError) {
log.warn("Stackoverflow error for pattern {} in text with linebreaks: {}", pattern.pattern(), searchTextWithLineBreaks);
}
return boundaries;

View File

@ -0,0 +1,25 @@
syntax = "proto3";
option java_outer_classname = "DocumentPageProto";
option java_package = "com.iqser.red.service.redaction.v1.server.data";
message AllDocumentPages {
repeated DocumentPage documentPages = 1;
}
message DocumentPage {
// The page number, starting with 1.
int32 number = 1;
// The page height in PDF user units.
int32 height = 2;
// The page width in PDF user units.
int32 width = 3;
// The page rotation as specified by the PDF.
int32 rotation = 4;
}

View File

@ -0,0 +1,28 @@
syntax = "proto3";
option java_outer_classname = "DocumentPositionDataProto";
option java_package = "com.iqser.red.service.redaction.v1.server.data";
message AllDocumentPositionData {
repeated DocumentPositionData documentPositionData = 1;
}
message DocumentPositionData {
// Identifier of the text block.
int64 id = 1;
// For each string coordinate in the search text of the text block, the array contains an entry relating the string coordinate to the position coordinate.
// This is required due to the text and position coordinates not being equal.
repeated int32 stringIdxToPositionIdx = 2;
// The bounding box for each glyph as a rectangle. This matrix is of size (n,4), where n is the number of glyphs in the text block.
// The second dimension specifies the rectangle with the value x, y, width, height, with x, y specifying the lower left corner.
// In order to access this information, the stringIdxToPositionIdx array must be used to transform the coordinates.
repeated Position positions = 3;
// Definition of a BoundingBox that contains x, y, width, and height.
message Position {
repeated float value = 1;
}
}

View File

@ -0,0 +1,12 @@
syntax = "proto3";
option java_outer_classname = "DocumentStructureProto";
option java_package = "com.iqser.red.service.redaction.v1.server.data";
import "EntryData.proto";
message DocumentStructure {
// The root EntryData represents the Document.
EntryData root = 1;
}

View File

@ -0,0 +1,40 @@
syntax = "proto3";
import "Range.proto";
option java_outer_classname = "DocumentTextDataProto";
option java_package = "com.iqser.red.service.redaction.v1.server.data";
message AllDocumentTextData {
repeated DocumentTextData documentTextData = 1;
}
message DocumentTextData {
// Identifier of the text block.
int64 id = 1;
// The page the text block occurs on.
int64 page = 2;
// The text of the text block.
string searchText = 3;
// Each text block is assigned a number on a page, starting from 0.
int32 numberOnPage = 4;
// The text blocks are ordered, this number represents the start of the text block as a string offset.
int32 start = 5;
// The text blocks are ordered, this number represents the end of the text block as a string offset.
int32 end = 6;
// The line breaks in the text of this semantic node in string offsets. They are exclusive end. At the end of each semantic node there is an implicit linebreak.
repeated int32 lineBreaks = 7;
// The text ranges where the text is italic
repeated Range italicTextRanges = 8;
// The text ranges where the text is bold
repeated Range boldTextRanges = 9;
}

View File

@ -0,0 +1,30 @@
syntax = "proto3";
import "LayoutEngine.proto";
import "NodeType.proto";
option java_outer_classname = "EntryDataProto";
option java_package = "com.iqser.red.service.redaction.v1.server.data";
message EntryData {
// Type of the semantic node.
NodeType type = 1;
// Specifies the position in the parsed tree structure.
repeated int32 treeId = 2;
// Specifies the text block IDs associated with this semantic node.
repeated int64 atomicBlockIds = 3;
// Specifies the pages this semantic node appears on.
repeated int64 pageNumbers = 4;
// Some semantic nodes have additional information, this information is stored in this Map.
map<string, string> properties = 5;
// All child Entries of this Entry.
repeated EntryData children = 6;
// Describes the origin of the semantic node.
repeated LayoutEngine engines = 7;
}

View File

@ -0,0 +1,10 @@
syntax = "proto3";
option java_outer_classname = "LayoutEngineProto";
option java_package = "com.iqser.red.service.redaction.v1.server.data";
enum LayoutEngine {
ALGORITHM = 0;
AI = 1;
OUTLINE = 2;
}

View File

@ -0,0 +1,19 @@
syntax = "proto3";
option java_outer_classname = "NodeTypeProto";
option java_package = "com.iqser.red.service.redaction.v1.server.data";
enum NodeType {
DOCUMENT = 0;
SECTION = 1;
SUPER_SECTION = 2;
HEADLINE = 3;
PARAGRAPH = 4;
TABLE = 5;
TABLE_CELL = 6;
IMAGE = 7;
HEADER = 8;
FOOTER = 9;
TABLE_OF_CONTENTS = 10;
TABLE_OF_CONTENTS_ITEM = 11;
}

View File

@ -0,0 +1,14 @@
syntax = "proto3";
option java_outer_classname = "RangeProto";
option java_package = "com.iqser.red.service.redaction.v1.server.data";
message Range {
// A start index.
int32 start = 1;
// An end index.
int32 end = 2;
}

View File

@ -0,0 +1,26 @@
#!/bin/bash
# Minimum required protoc version
MIN_VERSION="28.3"
# Get the installed protoc version
INSTALLED_VERSION=$(protoc --version | awk '{print $2}')
# Function to compare versions
version_lt() {
[ "$(printf '%s\n' "$1" "$2" | sort -V | head -n1)" != "$1" ]
}
# Check if protoc is installed and meets the minimum version
if ! command -v protoc &> /dev/null; then
echo "Error: protoc is not installed. Please install version $MIN_VERSION or later."
exit 1
fi
if version_lt "$INSTALLED_VERSION" "$MIN_VERSION"; then
echo "Error: protoc version $INSTALLED_VERSION is too old. Please upgrade to version $MIN_VERSION or later."
exit 1
fi
# Generate Java files from proto files
protoc --java_out=../java ./*.proto

View File

@ -0,0 +1,33 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import static org.junit.jupiter.api.Assertions.assertEquals;
import org.junit.jupiter.api.Test;
import com.iqser.red.service.redaction.v1.server.data.LayoutEngineProto;
public class LayoutEngineMappingTest {
@Test
public void assertAllValuesMatch() {
for (LayoutEngine value : LayoutEngine.values()) {
var engine = LayoutEngineProto.LayoutEngine.valueOf(value.name());
assertEquals(engine.name(), value.name());
}
}
@Test
public void assertAllValuesMatchReverse() {
for (LayoutEngineProto.LayoutEngine value : LayoutEngineProto.LayoutEngine.values()) {
if (value.equals(LayoutEngineProto.LayoutEngine.UNRECOGNIZED)) {
continue;
}
var engine = LayoutEngine.valueOf(value.name());
assertEquals(engine.name(), value.name());
}
}
}

View File

@ -0,0 +1,33 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import static org.junit.jupiter.api.Assertions.assertEquals;
import org.junit.jupiter.api.Test;
import com.iqser.red.service.redaction.v1.server.data.NodeTypeProto;
public class NodeTypeMappingTest {
@Test
public void assertAllValuesMatch() {
for (NodeType value : NodeType.values()) {
var engine = NodeTypeProto.NodeType.valueOf(value.name());
assertEquals(engine.name(), value.name());
}
}
@Test
public void assertAllValuesMatchReverse() {
for (NodeTypeProto.NodeType value : NodeTypeProto.NodeType.values()) {
if (value.equals(NodeTypeProto.NodeType.UNRECOGNIZED)) {
continue;
}
var engine = NodeType.valueOf(value.name());
assertEquals(engine.name(), value.name());
}
}
}

View File

@ -0,0 +1,144 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.List;
import org.junit.jupiter.api.Test;
class SectionIdentifierTest {
@Test
void testSectionIdentifier() {
SectionIdentifier identifier = SectionIdentifier.fromSearchText("1.1.2: Headline");
assertEquals(SectionIdentifier.Format.NUMERICAL, identifier.getFormat());
assertEquals(3, identifier.level());
assertEquals(List.of(1, 1, 2), identifier.getIdentifiers());
SectionIdentifier child = SectionIdentifier.asChildOf(identifier);
assertTrue(child.isChildOf(identifier));
SectionIdentifier parent = SectionIdentifier.fromSearchText("1.1: Headline");
assertTrue(parent.isParentOf(identifier));
}
@Test
void testSectionIdentifier2() {
SectionIdentifier identifier = SectionIdentifier.fromSearchText("A.1.2: Headline");
assertEquals(SectionIdentifier.Format.ALPHANUMERIC, identifier.getFormat());
assertEquals(3, identifier.level());
assertEquals(List.of(1, 1, 2), identifier.getIdentifiers());
}
@Test
void testSectionIdentifier3() {
SectionIdentifier identifier = SectionIdentifier.fromSearchText("D.1.2: Headline");
assertEquals(SectionIdentifier.Format.ALPHANUMERIC, identifier.getFormat());
assertEquals(3, identifier.level());
assertEquals(List.of(4, 1, 2), identifier.getIdentifiers());
}
@Test
void testSectionIdentifier4() {
SectionIdentifier identifier = SectionIdentifier.fromSearchText("4.1.2.4: Headline");
assertEquals(SectionIdentifier.Format.NUMERICAL, identifier.getFormat());
assertEquals(4, identifier.level());
assertEquals(List.of(4, 1, 2, 4), identifier.getIdentifiers());
}
@Test
void testSectionIdentifier5() {
SectionIdentifier identifier = SectionIdentifier.fromSearchText("D.1.2.4.5: Headline");
assertEquals(SectionIdentifier.Format.ALPHANUMERIC, identifier.getFormat());
assertEquals(4, identifier.level());
assertEquals(List.of(4, 1, 2, 4), identifier.getIdentifiers());
}
@Test
void testSectionIdentifier6() {
SectionIdentifier identifier = SectionIdentifier.fromSearchText("d.1.2.4.5: Headline");
assertEquals(SectionIdentifier.Format.ALPHANUMERIC, identifier.getFormat());
assertEquals(4, identifier.level());
assertEquals(List.of(4, 1, 2, 4), identifier.getIdentifiers());
}
@Test
void testSectionIdentifier7() {
SectionIdentifier identifier = SectionIdentifier.fromSearchText("4.1.2.4.5: Headline");
assertEquals(SectionIdentifier.Format.NUMERICAL, identifier.getFormat());
assertEquals(4, identifier.level());
assertEquals(List.of(4, 1, 2, 4), identifier.getIdentifiers());
}
@Test
void testFalsePositive111() {
SectionIdentifier identifier = SectionIdentifier.fromSearchText("111: Headline");
assertEquals(SectionIdentifier.Format.NUMERICAL, identifier.getFormat());
assertEquals(1, identifier.level());
}
@Test
public void testParentOf() {
var headline = SectionIdentifier.fromSearchText("1 Did you ever hear the tragedy of Darth Plagueis The Wise?");
var headline1 = SectionIdentifier.fromSearchText("1.0 I thought not. Its not a story the Jedi would tell you.");
var headline2 = SectionIdentifier.fromSearchText("1.1 Its a Sith legend. Darth Plagueis was a Dark Lord of the Sith, ");
var headline3 = SectionIdentifier.fromSearchText("1.2.3 so powerful and so wise he could use the Force to influence the midichlorians to create life…");
var headline4 = SectionIdentifier.fromSearchText("1.2.3.4 He had such a knowledge of the dark side that he could even keep the ones he cared about from dying.");
var headline5 = SectionIdentifier.fromSearchText("1.2.3.4.5 The dark side of the Force is a pathway to many abilities some consider to be unnatural.");
var headline6 = SectionIdentifier.fromSearchText("2.0 He became so powerful…");
var headline7 = SectionIdentifier.fromSearchText("10000.0 the only thing he was afraid of was losing his power,");
var headline8 = SectionIdentifier.fromSearchText("A.0 which eventually, of course, he did.");
var headline9 = SectionIdentifier.fromSearchText("Unfortunately, he taught his apprentice everything he knew, then his apprentice killed him in his sleep.");
var headline10 = SectionIdentifier.fromSearchText("2.1.2 Ironic.");
var headline11 = SectionIdentifier.fromSearchText("2.He could save others from death,");
var headline12 = SectionIdentifier.fromSearchText(" 2. but not himself.");
var paragraph1 = SectionIdentifier.asChildOf(headline);
assertTrue(paragraph1.isChildOf(headline));
assertTrue(headline.isParentOf(paragraph1));
assertFalse(paragraph1.isParentOf(headline));
assertFalse(headline.isParentOf(headline1));
assertTrue(headline.isParentOf(headline2));
assertTrue(headline.isParentOf(headline3));
assertTrue(headline.isParentOf(headline4));
assertTrue(headline.isParentOf(headline5));
assertTrue(headline1.isParentOf(headline2));
assertFalse(headline1.isParentOf(headline1));
assertTrue(headline3.isParentOf(headline4));
assertFalse(headline4.isParentOf(headline5));
assertFalse(headline2.isParentOf(headline3));
assertFalse(headline2.isParentOf(headline4));
assertTrue(headline1.isParentOf(headline3));
assertTrue(headline1.isParentOf(headline4));
assertFalse(headline1.isParentOf(headline6));
assertFalse(headline1.isParentOf(headline7));
assertFalse(headline8.isParentOf(headline1));
assertFalse(headline8.isParentOf(headline2));
assertFalse(headline8.isParentOf(headline3));
assertFalse(headline8.isParentOf(headline4));
assertFalse(headline9.isParentOf(headline9));
assertTrue(headline10.isChildOf(headline11));
assertTrue(headline10.isChildOf(headline12));
}
}

View File

@ -31,15 +31,10 @@ configurations {
}
}
configurations.all {
resolutionStrategy {
force("com.google.protobuf:protobuf-java:4.27.1")
}
}
dependencies {
implementation(project(":redaction-service-api-v1")) { exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1") }
implementation(project(":document"))
implementation("com.iqser.red.service:persistence-service-internal-api-v1:${persistenceServiceVersion}") { exclude(group = "org.springframework.boot") }
implementation("com.iqser.red.service:persistence-service-shared-mongo-v1:${persistenceServiceVersion}")
{
@ -69,7 +64,6 @@ dependencies {
implementation("org.drools:drools-engine:${droolsVersion}")
implementation("org.drools:drools-mvel:${droolsVersion}")
implementation("org.kie:kie-spring:7.74.1.Final")
implementation("com.google.protobuf:protobuf-java:4.27.1")
implementation("org.locationtech.jts:jts-core:1.19.0")
@ -186,13 +180,19 @@ tasks.register("generateJavaDoc", Javadoc::class) {
dependsOn("compileJava")
dependsOn("delombok")
classpath = project.sourceSets["main"].runtimeClasspath
source = fileTree("${buildDir}/generated/sources/delombok/java/main") {
val documentFiles = fileTree("${project(":document").layout.buildDirectory.get()}/generated/sources/delombok/java/main") {
include(droolsImports)
}
destinationDir = file(project.findProperty("javadocDestinationDir")?.toString() ?: "")
val mainFiles = fileTree("${layout.buildDirectory.get()}/generated/sources/delombok/java/main") {
include(droolsImports)
}
source = documentFiles + mainFiles
setDestinationDir(file(project.findProperty("javadocDestinationDir")?.toString() ?: ""))
options.memberLevel = JavadocMemberLevel.PUBLIC
(options as StandardJavadocDocletOptions).apply {
title = "API Documentation for Redaction Service ${project.version}"
}
}

View File

@ -10,11 +10,13 @@ import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.reflections.Reflections;
import org.reflections.scanners.Scanners;
import org.reflections.util.ConfigurationBuilder;
import org.reflections.util.FilterBuilder;
import com.iqser.red.service.redaction.v1.server.model.dictionary.SearchImplementation;
@ -25,6 +27,8 @@ import lombok.extern.slf4j.Slf4j;
public class DeprecatedElementsFinder {
public static final String PACKAGE_NAME = "com.iqser.red.service.redaction.v1.server";
public static final Pattern DATA_PACKAGE = Pattern.compile(".*/data/.*");
private Set<Method> deprecatedMethods;
@Getter
private Map<String, String> deprecatedMethodsSignaturesMap;
@ -43,7 +47,10 @@ public class DeprecatedElementsFinder {
Reflections reflections = new Reflections(new ConfigurationBuilder().forPackage(PACKAGE_NAME)
.setExpandSuperTypes(true)
.setScanners(Scanners.MethodsAnnotated, Scanners.TypesAnnotated, Scanners.SubTypes));
.setScanners(Scanners.MethodsAnnotated, Scanners.TypesAnnotated, Scanners.SubTypes)
.filterInputsBy(new FilterBuilder().includePackage(PACKAGE_NAME).excludePackage(PACKAGE_NAME + ".data")
// Exclude the generated proto data package
));
deprecatedMethods = reflections.get(Scanners.MethodsAnnotated.with(Deprecated.class).as(Method.class));

View File

@ -19,6 +19,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityTyp
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite;
import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule;
import com.iqser.red.service.redaction.v1.server.model.document.entity.RectangleWithPage;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;

View File

@ -3,7 +3,7 @@ package com.iqser.red.service.redaction.v1.server.model.component;
import java.util.Collection;
import java.util.List;
import com.iqser.red.service.redaction.v1.server.model.drools.RuleIdentifier;
import com.iqser.red.service.redaction.v1.server.model.document.entity.RuleIdentifier;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;

View File

@ -3,6 +3,8 @@ package com.iqser.red.service.redaction.v1.server.model.drools;
import org.drools.drl.ast.descr.AttributeDescr;
import org.drools.drl.ast.descr.RuleDescr;
import com.iqser.red.service.redaction.v1.server.model.document.entity.RuleIdentifier;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.EqualsAndHashCode;

View File

@ -4,6 +4,8 @@ import java.util.List;
import java.util.Objects;
import java.util.Optional;
import com.iqser.red.service.redaction.v1.server.model.document.entity.RuleType;
public record RuleClass(RuleType ruleType, List<RuleUnit> ruleUnits) {
public Optional<RuleUnit> findRuleUnitByInteger(Integer unit) {

View File

@ -11,6 +11,8 @@ import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.model.DroolsValidation;
import com.iqser.red.service.redaction.v1.server.model.document.entity.RuleIdentifier;
import com.iqser.red.service.redaction.v1.server.model.document.entity.RuleType;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;

View File

@ -3,7 +3,6 @@ package com.iqser.red.service.redaction.v1.server.service;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import org.springframework.stereotype.Service;
@ -14,7 +13,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribu
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.componentlog.ComponentLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogChanges;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
import com.iqser.red.service.redaction.v1.server.logger.Context;
import com.iqser.red.service.redaction.v1.server.model.KieWrapper;

View File

@ -19,9 +19,9 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequ
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
import com.iqser.red.service.redaction.v1.server.client.model.NerEntitiesModel;
import com.iqser.red.service.redaction.v1.server.mapper.DocumentGraphMapper;
import com.iqser.red.service.redaction.v1.server.model.KieWrapper;
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity;
@ -31,7 +31,6 @@ import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryVers
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.service.document.DocumentGraphMapper;
import com.iqser.red.service.redaction.v1.server.service.document.ImportedRedactionEntryService;
import com.iqser.red.service.redaction.v1.server.service.document.ManualRedactionEntryService;
import com.iqser.red.service.redaction.v1.server.service.document.NerEntitiesAdapter;

View File

@ -9,33 +9,24 @@ import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.springframework.stereotype.Service;
import org.springframework.web.bind.annotation.RequestBody;
import com.iqser.gin4.commons.metrics.meters.FunctionTimerValues;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
import com.iqser.red.service.persistence.service.v1.api.shared.model.MessageType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.componentlog.ComponentLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogChanges;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedLegalBases;
import com.iqser.red.service.persistence.service.v1.api.shared.model.mapper.ImportedLegalBasisMapper;
import com.iqser.red.service.persistence.service.v1.api.shared.mongo.service.EntityLogMongoService;
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
import com.iqser.red.service.redaction.v1.server.logger.Context;
import com.iqser.red.service.redaction.v1.server.model.KieWrapper;
import com.iqser.red.service.redaction.v1.server.model.component.Component;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.service.AnalysisPreparationService.AnalysisData;
import com.iqser.red.service.redaction.v1.server.service.components.ComponentLogCreatorService;
import com.iqser.red.service.redaction.v1.server.service.drools.ComponentDroolsExecutionService;
import com.iqser.red.service.redaction.v1.server.service.drools.EntityDroolsExecutionService;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.knecon.fforesight.tenantcommons.TenantContext;

View File

@ -12,10 +12,9 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.BulkLocalRe
import com.iqser.red.service.persistence.service.v1.api.shared.model.BulkLocalResponse;
import com.iqser.red.service.persistence.service.v1.api.shared.model.FoundTerm;
import com.iqser.red.service.redaction.v1.model.QueueNames;
import com.iqser.red.service.redaction.v1.server.mapper.DocumentGraphMapper;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.service.document.DocumentGraphMapper;
import com.iqser.red.service.redaction.v1.server.service.document.EntityFindingUtility;
import com.iqser.red.service.redaction.v1.server.storage.ObservedStorageService;
import com.knecon.fforesight.tenantcommons.TenantContext;

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.service.document;
package com.iqser.red.service.redaction.v1.server.service;
import static java.lang.String.format;
import static java.util.stream.Collectors.groupingBy;
@ -20,15 +20,17 @@ import org.springframework.stereotype.Service;
import com.google.common.collect.Sets;
import com.iqser.red.service.redaction.v1.server.model.ClosestEntity;
import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity;
import com.iqser.red.service.redaction.v1.server.model.RectangleWithPage;
import com.iqser.red.service.redaction.v1.server.model.dictionary.SearchImplementation;
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
import com.iqser.red.service.redaction.v1.server.model.document.entity.RectangleWithPage;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService;
import com.iqser.red.service.redaction.v1.server.service.document.EntityEnrichmentService;
import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations;
import lombok.extern.slf4j.Slf4j;

View File

@ -11,7 +11,6 @@ import java.util.Optional;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.springframework.stereotype.Service;
@ -207,9 +206,14 @@ public class EntityLogCreatorService {
textEntities.forEach(entityNode -> entries.addAll(toEntityLogEntries(entityNode, analysisNumber, manualChangesMap)));
images.forEach(imageNode -> entries.add(createEntityLogEntry(imageNode, dossierTemplateId, analysisNumber, manualChangesMap.getOrDefault(imageNode.getId(), new ArrayList<>()))));
images.forEach(imageNode -> entries.add(createEntityLogEntry(imageNode,
dossierTemplateId,
analysisNumber,
manualChangesMap.getOrDefault(imageNode.getId(), new ArrayList<>()))));
notFoundPrecursorEntities.forEach(precursorEntity -> entries.add(createEntityLogEntry(precursorEntity, analysisNumber, manualChangesMap.getOrDefault(precursorEntity.getId(), new ArrayList<>()))));
notFoundPrecursorEntities.forEach(precursorEntity -> entries.add(createEntityLogEntry(precursorEntity,
analysisNumber,
manualChangesMap.getOrDefault(precursorEntity.getId(), new ArrayList<>()))));
return entries;
}
@ -250,7 +254,7 @@ public class EntityLogCreatorService {
.id(image.getId())
.value(image.getValue())
.type(imageType)
.reason(image.buildReasonWithManualChangeDescriptions())
.reason(image.buildReason())
.legalBasis(image.legalBasis())
.matchedRule(image.getMatchedRule().getRuleIdentifier().toString())
.dictionaryEntry(false)
@ -280,7 +284,7 @@ public class EntityLogCreatorService {
return EntityLogEntry.builder()
.id(precursorEntity.getId())
.reason(precursorEntity.buildReasonWithManualChangeDescriptions())
.reason(precursorEntity.buildReason())
.legalBasis(precursorEntity.legalBasis())
.value(precursorEntity.value())
.type(type)
@ -327,7 +331,7 @@ public class EntityLogCreatorService {
List<ManualChange> allManualChanges = ManualChangeFactory.toLocalManualChangeList(entity.getManualOverwrite().getManualChangeLog(), true, analysisNumber);
return EntityLogEntry.builder()
.reason(entity.buildReasonWithManualChangeDescriptions())
.reason(entity.buildReason())
.legalBasis(entity.legalBasis())
.value(entity.getManualOverwrite().getValue()
.orElse(entity.getMatchedRule().isWriteValueWithLineBreaks() ? entity.getValueWithLineBreaks() : entity.getValue()))

View File

@ -24,7 +24,6 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.service.document.EntityFindingUtility;
import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations;
import lombok.AccessLevel;

View File

@ -20,7 +20,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ChangeFactory;
import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity;
import com.iqser.red.service.redaction.v1.server.model.RectangleWithPage;
import com.iqser.red.service.redaction.v1.server.model.document.entity.RectangleWithPage;
import io.micrometer.core.annotation.Timed;
import io.micrometer.observation.annotation.Observed;

View File

@ -23,11 +23,10 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.redaction.v1.model.AnalyzeResponse;
import com.iqser.red.service.redaction.v1.model.QueueNames;
import com.iqser.red.service.redaction.v1.model.UnprocessedManualEntity;
import com.iqser.red.service.redaction.v1.server.mapper.DocumentGraphMapper;
import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.service.document.DocumentGraphMapper;
import com.iqser.red.service.redaction.v1.server.service.document.EntityFindingUtility;
import com.iqser.red.service.redaction.v1.server.service.document.EntityFromPrecursorCreationService;
import com.iqser.red.service.redaction.v1.server.storage.ObservedStorageService;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
@ -63,7 +62,10 @@ public class UnprocessedChangesService {
dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
List<String> annotationIds = analyzeRequest.getManualRedactions().buildAll().stream().map(BaseAnnotation::getAnnotationId).toList();
List<String> annotationIds = analyzeRequest.getManualRedactions().buildAll()
.stream()
.map(BaseAnnotation::getAnnotationId)
.toList();
List<EntityLogEntry> entityLogEntries = redactionStorageService.getEntityLogEntriesById(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), annotationIds);
Document document = DocumentGraphMapper.toDocumentGraph(observedStorageService.getDocumentData(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
@ -81,8 +83,7 @@ public class UnprocessedChangesService {
List<ManualResizeRedaction> manualResizeRedactions = analyzeRequest.getManualRedactions().getResizeRedactions()
.stream()
.toList();
List<PrecursorEntity> manualEntitiesToBeResized = entityLogEntries
.stream()
List<PrecursorEntity> manualEntitiesToBeResized = entityLogEntries.stream()
.filter(entityLogEntry -> resizeIds.contains(entityLogEntry.getId()))
.filter(entityLogEntry -> entityLogEntry.getEngines().contains(Engine.MANUAL))
.toList()

View File

@ -24,8 +24,8 @@ import org.kie.api.runtime.KieSession;
import com.iqser.red.service.redaction.v1.server.model.component.Component;
import com.iqser.red.service.redaction.v1.server.model.component.Entity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.RuleIdentifier;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
import com.iqser.red.service.redaction.v1.server.model.drools.RuleIdentifier;
import com.iqser.red.service.redaction.v1.server.utils.ComponentCreationUtils;
import com.iqser.red.service.redaction.v1.server.utils.DateConverter;

View File

@ -1476,7 +1476,6 @@ public class EntityCreationService {
}
private void addDuplicateEntityToGraph(TextEntity entityToDuplicate, TextRange newTextRange, SemanticNode node) {
entityToDuplicate.addTextRange(newTextRange);

View File

@ -6,9 +6,9 @@ import java.util.Objects;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
import lombok.RequiredArgsConstructor;

View File

@ -22,6 +22,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityTyp
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.service.DictionaryService;
import com.iqser.red.service.redaction.v1.server.service.EntityFindingUtility;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
@ -130,10 +131,8 @@ public class EntityFromPrecursorCreationService {
} else {
String section = precursorEntity.getManualOverwrite().getSection()
.orElse(null);
if ((section == null || section.isBlank())
&& precursorEntity.getSection() != null
&& !precursorEntity.getSection().isBlank()
&& precursorEntity.getEngines().contains(Engine.IMPORTED)) {
if ((section == null || section.isBlank()) && precursorEntity.getSection() != null && !precursorEntity.getSection().isBlank() && precursorEntity.getEngines()
.contains(Engine.IMPORTED)) {
section = precursorEntity.getSection();
}

View File

@ -3,12 +3,13 @@ package com.iqser.red.service.redaction.v1.server.service.document;
import java.util.HashSet;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.AbstractNodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import lombok.Getter;
public class IntersectingNodeVisitor implements NodeVisitor {
public class IntersectingNodeVisitor extends AbstractNodeVisitor {
@Getter
private Set<SemanticNode> intersectingNodes;
@ -23,9 +24,9 @@ public class IntersectingNodeVisitor implements NodeVisitor {
@Override
public void visit(SemanticNode node) {
public void visitNodeDefault(SemanticNode node) {
if (node.getTextRange().intersects(textRange)) {
if (textRange.intersects(node.getTextRange())) {
intersectingNodes.add(node);
}
}

View File

@ -1,9 +0,0 @@
package com.iqser.red.service.redaction.v1.server.service.document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
public interface NodeVisitor {
void visit(SemanticNode node);
}

Some files were not shown because too many files have changed in this diff Show More