Compare commits
188 Commits
RED-9859-b
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fdd2b954fe | ||
|
|
2d3a048487 | ||
|
|
518c38c2e9 | ||
|
|
21097a6419 | ||
|
|
c8dd167606 | ||
|
|
9bd5577986 | ||
|
|
c1990ef4aa | ||
|
|
3dfa05bd67 | ||
|
|
22b2a6474b | ||
|
|
cf21b75f2e | ||
|
|
a1e6361c3e | ||
|
|
3c2db795c8 | ||
|
|
ef1810b658 | ||
|
|
26025a5621 | ||
|
|
4fa91a59e0 | ||
|
|
7c37776af4 | ||
|
|
9000f755a3 | ||
|
|
62ec63cc55 | ||
|
|
db59ae014b | ||
|
|
dfd262e9e1 | ||
|
|
4fd36768b2 | ||
|
|
e04c6dadd7 | ||
|
|
213d3bf645 | ||
|
|
66f3f6ce59 | ||
|
|
3f606ad567 | ||
|
|
7b1c6beb11 | ||
|
|
e660184646 | ||
|
|
f1f2d02266 | ||
|
|
947cbe4cd2 | ||
|
|
e8dc37374e | ||
|
|
8769922bf2 | ||
|
|
21f2ded6c6 | ||
|
|
9f20a14aec | ||
|
|
681d6328ef | ||
|
|
97c23c367e | ||
|
|
1b7c59d292 | ||
|
|
f9d939958f | ||
|
|
41f824297c | ||
|
|
68f75f070c | ||
|
|
4c19be01c6 | ||
|
|
98ba463639 | ||
|
|
e415234bf8 | ||
|
|
7f96c7b51e | ||
|
|
a0d3c4cf86 | ||
|
|
d2a768d9f5 | ||
|
|
16a7a8b9f4 | ||
|
|
288e0d3c51 | ||
|
|
c9468f3cf4 | ||
|
|
1abceb2e20 | ||
|
|
f8e2aae6e7 | ||
|
|
569699139f | ||
|
|
45cd7f3d98 | ||
|
|
fa3ba58bd3 | ||
|
|
69f7d688d0 | ||
|
|
76bab106bc | ||
|
|
62e26add99 | ||
|
|
1293484295 | ||
|
|
de0027dd6d | ||
|
|
d73ec58d8a | ||
|
|
9e032cf1a8 | ||
|
|
c2fd73efe7 | ||
|
|
db405278bc | ||
|
|
11a9f2f8aa | ||
|
|
0aea884da2 | ||
|
|
d32c56e101 | ||
|
|
92d03de194 | ||
|
|
6a58bf6d44 | ||
|
|
daed4e07ef | ||
|
|
eceba3a37a | ||
|
|
4b647b23a3 | ||
|
|
dd1b7cf72f | ||
|
|
725e6c1e14 | ||
|
|
ab49e5d296 | ||
|
|
48d14cff8f | ||
|
|
dbe8e08bba | ||
|
|
98456ceb6d | ||
|
|
a7effce48e | ||
|
|
d068160a9d | ||
|
|
f37e49e8bb | ||
|
|
1f3cf8d529 | ||
|
|
b61b89bc5b | ||
|
|
8aa31a18af | ||
|
|
b2a837bc54 | ||
|
|
f93e59e29e | ||
|
|
b8612d9b95 | ||
|
|
1683745dc7 | ||
|
|
ec50eca15b | ||
|
|
3ff541fee6 | ||
|
|
07b9b8bf8a | ||
|
|
3c165070ee | ||
|
|
ecd57e17a2 | ||
|
|
9fc065518d | ||
|
|
3c3c029cf4 | ||
|
|
d767966056 | ||
|
|
bcbd4587f1 | ||
|
|
03e321a824 | ||
|
|
316b4c1d02 | ||
|
|
3e0f2254ed | ||
|
|
cd2bda15aa | ||
|
|
895bc56590 | ||
|
|
292869c502 | ||
|
|
f3cdf46008 | ||
|
|
795f8fd31b | ||
|
|
403e2f4153 | ||
|
|
5470157468 | ||
|
|
80aaeea8dd | ||
|
|
5a5f14127b | ||
|
|
bc49cc6e8d | ||
|
|
59103d3075 | ||
|
|
6cf17ef4f3 | ||
|
|
40d832fc3a | ||
|
|
b9240dec68 | ||
|
|
ea7137b242 | ||
|
|
8375783ce8 | ||
|
|
ebee9f04bd | ||
|
|
4dc0a1fbdc | ||
|
|
5ebe82b7ce | ||
|
|
d1c2d43ffb | ||
|
|
004f6cb5f9 | ||
|
|
4fc24fdfe3 | ||
|
|
9d668f9be1 | ||
|
|
4184333506 | ||
|
|
062d29c2ea | ||
|
|
dafecc35b6 | ||
|
|
5327591d54 | ||
|
|
581e367b6e | ||
|
|
b58985fa8a | ||
|
|
cdfa1afcd0 | ||
|
|
78990b5555 | ||
|
|
2dada39717 | ||
|
|
66647e45b0 | ||
|
|
2ae8b0fbce | ||
|
|
00409584ee | ||
|
|
8fc1f77688 | ||
|
|
594acb4e82 | ||
|
|
f9c77a3695 | ||
|
|
30cd60f702 | ||
|
|
6f361f0deb | ||
|
|
f629448e4b | ||
|
|
1b67ea1068 | ||
|
|
9dbf5c479f | ||
|
|
c760733e41 | ||
|
|
99182fba23 | ||
|
|
ac5a7a73b1 | ||
|
|
9d093addaf | ||
|
|
56cabd54a2 | ||
|
|
7edf00f014 | ||
|
|
3a5072d7af | ||
|
|
02abe895a1 | ||
|
|
c29329e3df | ||
|
|
b8cf0ab005 | ||
|
|
5a4d60eb03 | ||
|
|
7bc76d9306 | ||
|
|
c351a71831 | ||
|
|
1d135a26fa | ||
|
|
5e8dc747bb | ||
|
|
5ff3ebc6cb | ||
|
|
4aac5fb71a | ||
|
|
ae8b82245f | ||
|
|
5abe73cae2 | ||
|
|
b6d1edacba | ||
|
|
80bc8242d0 | ||
|
|
2ab536dfee | ||
|
|
e09cd59256 | ||
|
|
3fd98c67f3 | ||
|
|
1800a89b39 | ||
|
|
229756d185 | ||
|
|
a7d9a37be9 | ||
|
|
71b2dda7cd | ||
|
|
fe7b43c32a | ||
|
|
09e84cdb75 | ||
|
|
776a24306f | ||
|
|
3e75f3e5e7 | ||
|
|
1c81212a2d | ||
|
|
c279f54295 | ||
|
|
5f52df1ebe | ||
|
|
1b701a67c0 | ||
|
|
5cb81e7f8b | ||
|
|
c869aab13b | ||
|
|
a58bcedccf | ||
|
|
9e45864b35 | ||
|
|
67657e91b5 | ||
|
|
c1a2e9dee2 | ||
|
|
0a5cc0140f | ||
|
|
e9fdedc684 | ||
|
|
d9a55638e7 | ||
|
|
990c09cfad | ||
|
|
f3ae68a8da |
@ -7,20 +7,25 @@ include:
|
||||
ref: 'main'
|
||||
file: 'ci-templates/gradle_java.yml'
|
||||
|
||||
deploy JavaDoc:
|
||||
publish dependencies:
|
||||
stage: deploy
|
||||
tags:
|
||||
- dind
|
||||
script:
|
||||
- echo "Building JavaDoc with gradle version ${BUILDVERSION}"
|
||||
- echo "Publishing dependencies with gradle version ${BUILDVERSION}"
|
||||
- gradle -Pversion=${BUILDVERSION} publish
|
||||
- echo "BUILDVERSION=$(echo ${BUILDVERSION})" >> variables.env
|
||||
artifacts:
|
||||
reports:
|
||||
dotenv: variables.env
|
||||
rules:
|
||||
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
|
||||
- if: $CI_COMMIT_BRANCH =~ /^release/
|
||||
- if: $CI_COMMIT_BRANCH =~ /^feature/
|
||||
- if: $CI_COMMIT_TAG
|
||||
|
||||
generateJavaDoc:
|
||||
stage: build
|
||||
generate JavaDoc:
|
||||
stage: deploy
|
||||
tags:
|
||||
- dind
|
||||
script:
|
||||
@ -35,14 +40,39 @@ generateJavaDoc:
|
||||
- if: $CI_COMMIT_TAG
|
||||
|
||||
pages:
|
||||
stage: build
|
||||
stage: deploy
|
||||
needs:
|
||||
- generateJavaDoc
|
||||
- generate JavaDoc
|
||||
- publish dependencies
|
||||
- calculate minor version
|
||||
pages:
|
||||
path_prefix: "$BUILDVERSION"
|
||||
script:
|
||||
- mkdir public
|
||||
- mv redaction-service-v1/redaction-service-server-v1/javadoc/* public/
|
||||
- URL=$(echo $BUILDVERSION | sed -e 's|\.|-|g')
|
||||
- echo "Deploying to ${CI_PAGES_URL}/${URL}"
|
||||
rules:
|
||||
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
|
||||
artifacts:
|
||||
paths:
|
||||
- public
|
||||
|
||||
publish JavaDoc to azure:
|
||||
image: rclone/rclone:1.67.0
|
||||
tags:
|
||||
- dind
|
||||
stage: deploy
|
||||
when: manual
|
||||
variables:
|
||||
VERSION_NAME: "latest"
|
||||
needs:
|
||||
- generate JavaDoc
|
||||
script:
|
||||
- echo "Deploy JavaDoc with version ${VERSION_NAME} to prod"
|
||||
- rclone delete azurejavadocs:/$RCLONE_CONFIG_AZUREJAVADOCS_CONTAINER/${VERSION_NAME}
|
||||
- rclone copy redaction-service-v1/redaction-service-server-v1/javadoc/ azurejavadocs:/$RCLONE_CONFIG_AZUREJAVADOCS_CONTAINER/javadoc/${VERSION_NAME}/
|
||||
rules:
|
||||
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
|
||||
- if: $CI_COMMIT_BRANCH =~ /^release/
|
||||
- if: $CI_COMMIT_TAG
|
||||
|
||||
@ -15,8 +15,13 @@ pmd {
|
||||
isConsoleOutput = true
|
||||
}
|
||||
|
||||
tasks.checkstyleMain {
|
||||
exclude("**/data/**") // ignore generated proto files
|
||||
}
|
||||
|
||||
tasks.pmdMain {
|
||||
pmd.ruleSetFiles = files("${rootDir}/config/pmd/pmd.xml")
|
||||
exclude("**/data/**") // ignore generated proto files
|
||||
}
|
||||
|
||||
tasks.pmdTest {
|
||||
@ -28,6 +33,8 @@ tasks.named<Test>("test") {
|
||||
reports {
|
||||
junitXml.outputLocation.set(layout.buildDirectory.dir("reports/junit"))
|
||||
}
|
||||
minHeapSize = "512m"
|
||||
maxHeapSize = "2048m"
|
||||
}
|
||||
|
||||
tasks.test {
|
||||
|
||||
@ -9,11 +9,14 @@ gradle assemble
|
||||
# Get the current Git branch
|
||||
branch=$(git rev-parse --abbrev-ref HEAD)
|
||||
|
||||
# Replace any slashes (e.g., in 'feature/' or 'release/') with a hyphen
|
||||
cleaned_branch=$(echo "$branch" | sed 's/\//_/g')
|
||||
|
||||
# Get the short commit hash (first 5 characters)
|
||||
commit_hash=$(git rev-parse --short=5 HEAD)
|
||||
|
||||
# Combine branch and commit hash
|
||||
buildName="${USER}-${branch}-${commit_hash}"
|
||||
buildName="${USER}-${cleaned_branch}-${commit_hash}"
|
||||
|
||||
gradle bootBuildImage --publishImage -PbuildbootDockerHostNetwork=true -Pversion=${buildName}
|
||||
|
||||
|
||||
35
redaction-service-v1/document/build.gradle.kts
Normal file
35
redaction-service-v1/document/build.gradle.kts
Normal file
@ -0,0 +1,35 @@
|
||||
plugins {
|
||||
id("com.iqser.red.service.java-conventions")
|
||||
id("io.freefair.lombok") version "8.4"
|
||||
}
|
||||
|
||||
description = "redaction-service-document"
|
||||
val persistenceServiceVersion = "2.612.0-RED10072.1"
|
||||
val layoutParserVersion = "newNode"
|
||||
|
||||
group = "com.knecon.fforesight"
|
||||
|
||||
dependencies {
|
||||
implementation("com.iqser.red.service:persistence-service-internal-api-v1:${persistenceServiceVersion}")
|
||||
api("com.google.protobuf:protobuf-java-util:4.28.3")
|
||||
|
||||
testImplementation("org.junit.jupiter:junit-jupiter-api:5.8.1")
|
||||
testRuntimeOnly("org.junit.jupiter:junit-jupiter-engine:5.8.1")
|
||||
}
|
||||
|
||||
publishing {
|
||||
publications {
|
||||
create<MavenPublication>(name) {
|
||||
from(components["java"])
|
||||
}
|
||||
}
|
||||
repositories {
|
||||
maven {
|
||||
url = uri("https://nexus.knecon.com/repository/red-platform-releases/")
|
||||
credentials {
|
||||
username = providers.gradleProperty("mavenUser").getOrNull();
|
||||
password = providers.gradleProperty("mavenPassword").getOrNull();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,36 @@
|
||||
package com.iqser.red.service.redaction.v1.server.data;
|
||||
|
||||
import static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.data.DocumentPageProto.AllDocumentPages;
|
||||
import com.iqser.red.service.redaction.v1.server.data.DocumentPositionDataProto.AllDocumentPositionData;
|
||||
import com.iqser.red.service.redaction.v1.server.data.DocumentTextDataProto.AllDocumentTextData;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class DocumentData implements Serializable {
|
||||
|
||||
AllDocumentPages documentPages;
|
||||
AllDocumentTextData documentTextData;
|
||||
AllDocumentPositionData documentPositionData;
|
||||
DocumentStructureWrapper documentStructureWrapper;
|
||||
|
||||
|
||||
public DocumentStructure getDocumentStructure() {
|
||||
|
||||
return documentStructureWrapper.getDocumentStructure();
|
||||
}
|
||||
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,694 @@
|
||||
// Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
// NO CHECKED-IN PROTOBUF GENCODE
|
||||
// source: DocumentStructure.proto
|
||||
// Protobuf Java Version: 4.28.3
|
||||
|
||||
package com.iqser.red.service.redaction.v1.server.data;
|
||||
|
||||
public final class DocumentStructureProto {
|
||||
private DocumentStructureProto() {}
|
||||
static {
|
||||
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
|
||||
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
|
||||
/* major= */ 4,
|
||||
/* minor= */ 28,
|
||||
/* patch= */ 3,
|
||||
/* suffix= */ "",
|
||||
DocumentStructureProto.class.getName());
|
||||
}
|
||||
public static void registerAllExtensions(
|
||||
com.google.protobuf.ExtensionRegistryLite registry) {
|
||||
}
|
||||
|
||||
public static void registerAllExtensions(
|
||||
com.google.protobuf.ExtensionRegistry registry) {
|
||||
registerAllExtensions(
|
||||
(com.google.protobuf.ExtensionRegistryLite) registry);
|
||||
}
|
||||
public interface DocumentStructureOrBuilder extends
|
||||
// @@protoc_insertion_point(interface_extends:DocumentStructure)
|
||||
com.google.protobuf.MessageOrBuilder {
|
||||
|
||||
/**
|
||||
* <pre>
|
||||
* The root EntryData represents the Document.
|
||||
* </pre>
|
||||
*
|
||||
* <code>.EntryData root = 1;</code>
|
||||
* @return Whether the root field is set.
|
||||
*/
|
||||
boolean hasRoot();
|
||||
/**
|
||||
* <pre>
|
||||
* The root EntryData represents the Document.
|
||||
* </pre>
|
||||
*
|
||||
* <code>.EntryData root = 1;</code>
|
||||
* @return The root.
|
||||
*/
|
||||
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData getRoot();
|
||||
/**
|
||||
* <pre>
|
||||
* The root EntryData represents the Document.
|
||||
* </pre>
|
||||
*
|
||||
* <code>.EntryData root = 1;</code>
|
||||
*/
|
||||
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryDataOrBuilder getRootOrBuilder();
|
||||
}
|
||||
/**
|
||||
* Protobuf type {@code DocumentStructure}
|
||||
*/
|
||||
public static final class DocumentStructure extends
|
||||
com.google.protobuf.GeneratedMessage implements
|
||||
// @@protoc_insertion_point(message_implements:DocumentStructure)
|
||||
DocumentStructureOrBuilder {
|
||||
private static final long serialVersionUID = 0L;
|
||||
static {
|
||||
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
|
||||
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
|
||||
/* major= */ 4,
|
||||
/* minor= */ 28,
|
||||
/* patch= */ 3,
|
||||
/* suffix= */ "",
|
||||
DocumentStructure.class.getName());
|
||||
}
|
||||
// Use DocumentStructure.newBuilder() to construct.
|
||||
private DocumentStructure(com.google.protobuf.GeneratedMessage.Builder<?> builder) {
|
||||
super(builder);
|
||||
}
|
||||
private DocumentStructure() {
|
||||
}
|
||||
|
||||
public static final com.google.protobuf.Descriptors.Descriptor
|
||||
getDescriptor() {
|
||||
return com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.internal_static_DocumentStructure_descriptor;
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
|
||||
internalGetFieldAccessorTable() {
|
||||
return com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.internal_static_DocumentStructure_fieldAccessorTable
|
||||
.ensureFieldAccessorsInitialized(
|
||||
com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure.class, com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure.Builder.class);
|
||||
}
|
||||
|
||||
private int bitField0_;
|
||||
public static final int ROOT_FIELD_NUMBER = 1;
|
||||
private com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData root_;
|
||||
/**
|
||||
* <pre>
|
||||
* The root EntryData represents the Document.
|
||||
* </pre>
|
||||
*
|
||||
* <code>.EntryData root = 1;</code>
|
||||
* @return Whether the root field is set.
|
||||
*/
|
||||
@java.lang.Override
|
||||
public boolean hasRoot() {
|
||||
return ((bitField0_ & 0x00000001) != 0);
|
||||
}
|
||||
/**
|
||||
* <pre>
|
||||
* The root EntryData represents the Document.
|
||||
* </pre>
|
||||
*
|
||||
* <code>.EntryData root = 1;</code>
|
||||
* @return The root.
|
||||
*/
|
||||
@java.lang.Override
|
||||
public com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData getRoot() {
|
||||
return root_ == null ? com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.getDefaultInstance() : root_;
|
||||
}
|
||||
/**
|
||||
* <pre>
|
||||
* The root EntryData represents the Document.
|
||||
* </pre>
|
||||
*
|
||||
* <code>.EntryData root = 1;</code>
|
||||
*/
|
||||
@java.lang.Override
|
||||
public com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryDataOrBuilder getRootOrBuilder() {
|
||||
return root_ == null ? com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.getDefaultInstance() : root_;
|
||||
}
|
||||
|
||||
private byte memoizedIsInitialized = -1;
|
||||
@java.lang.Override
|
||||
public final boolean isInitialized() {
|
||||
byte isInitialized = memoizedIsInitialized;
|
||||
if (isInitialized == 1) return true;
|
||||
if (isInitialized == 0) return false;
|
||||
|
||||
memoizedIsInitialized = 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public void writeTo(com.google.protobuf.CodedOutputStream output)
|
||||
throws java.io.IOException {
|
||||
if (((bitField0_ & 0x00000001) != 0)) {
|
||||
output.writeMessage(1, getRoot());
|
||||
}
|
||||
getUnknownFields().writeTo(output);
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public int getSerializedSize() {
|
||||
int size = memoizedSize;
|
||||
if (size != -1) return size;
|
||||
|
||||
size = 0;
|
||||
if (((bitField0_ & 0x00000001) != 0)) {
|
||||
size += com.google.protobuf.CodedOutputStream
|
||||
.computeMessageSize(1, getRoot());
|
||||
}
|
||||
size += getUnknownFields().getSerializedSize();
|
||||
memoizedSize = size;
|
||||
return size;
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public boolean equals(final java.lang.Object obj) {
|
||||
if (obj == this) {
|
||||
return true;
|
||||
}
|
||||
if (!(obj instanceof com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure)) {
|
||||
return super.equals(obj);
|
||||
}
|
||||
com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure other = (com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure) obj;
|
||||
|
||||
if (hasRoot() != other.hasRoot()) return false;
|
||||
if (hasRoot()) {
|
||||
if (!getRoot()
|
||||
.equals(other.getRoot())) return false;
|
||||
}
|
||||
if (!getUnknownFields().equals(other.getUnknownFields())) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public int hashCode() {
|
||||
if (memoizedHashCode != 0) {
|
||||
return memoizedHashCode;
|
||||
}
|
||||
int hash = 41;
|
||||
hash = (19 * hash) + getDescriptor().hashCode();
|
||||
if (hasRoot()) {
|
||||
hash = (37 * hash) + ROOT_FIELD_NUMBER;
|
||||
hash = (53 * hash) + getRoot().hashCode();
|
||||
}
|
||||
hash = (29 * hash) + getUnknownFields().hashCode();
|
||||
memoizedHashCode = hash;
|
||||
return hash;
|
||||
}
|
||||
|
||||
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(
|
||||
java.nio.ByteBuffer data)
|
||||
throws com.google.protobuf.InvalidProtocolBufferException {
|
||||
return PARSER.parseFrom(data);
|
||||
}
|
||||
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(
|
||||
java.nio.ByteBuffer data,
|
||||
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
|
||||
throws com.google.protobuf.InvalidProtocolBufferException {
|
||||
return PARSER.parseFrom(data, extensionRegistry);
|
||||
}
|
||||
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(
|
||||
com.google.protobuf.ByteString data)
|
||||
throws com.google.protobuf.InvalidProtocolBufferException {
|
||||
return PARSER.parseFrom(data);
|
||||
}
|
||||
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(
|
||||
com.google.protobuf.ByteString data,
|
||||
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
|
||||
throws com.google.protobuf.InvalidProtocolBufferException {
|
||||
return PARSER.parseFrom(data, extensionRegistry);
|
||||
}
|
||||
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(byte[] data)
|
||||
throws com.google.protobuf.InvalidProtocolBufferException {
|
||||
return PARSER.parseFrom(data);
|
||||
}
|
||||
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(
|
||||
byte[] data,
|
||||
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
|
||||
throws com.google.protobuf.InvalidProtocolBufferException {
|
||||
return PARSER.parseFrom(data, extensionRegistry);
|
||||
}
|
||||
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(java.io.InputStream input)
|
||||
throws java.io.IOException {
|
||||
return com.google.protobuf.GeneratedMessage
|
||||
.parseWithIOException(PARSER, input);
|
||||
}
|
||||
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(
|
||||
java.io.InputStream input,
|
||||
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
|
||||
throws java.io.IOException {
|
||||
return com.google.protobuf.GeneratedMessage
|
||||
.parseWithIOException(PARSER, input, extensionRegistry);
|
||||
}
|
||||
|
||||
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseDelimitedFrom(java.io.InputStream input)
|
||||
throws java.io.IOException {
|
||||
return com.google.protobuf.GeneratedMessage
|
||||
.parseDelimitedWithIOException(PARSER, input);
|
||||
}
|
||||
|
||||
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseDelimitedFrom(
|
||||
java.io.InputStream input,
|
||||
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
|
||||
throws java.io.IOException {
|
||||
return com.google.protobuf.GeneratedMessage
|
||||
.parseDelimitedWithIOException(PARSER, input, extensionRegistry);
|
||||
}
|
||||
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(
|
||||
com.google.protobuf.CodedInputStream input)
|
||||
throws java.io.IOException {
|
||||
return com.google.protobuf.GeneratedMessage
|
||||
.parseWithIOException(PARSER, input);
|
||||
}
|
||||
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(
|
||||
com.google.protobuf.CodedInputStream input,
|
||||
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
|
||||
throws java.io.IOException {
|
||||
return com.google.protobuf.GeneratedMessage
|
||||
.parseWithIOException(PARSER, input, extensionRegistry);
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public Builder newBuilderForType() { return newBuilder(); }
|
||||
public static Builder newBuilder() {
|
||||
return DEFAULT_INSTANCE.toBuilder();
|
||||
}
|
||||
public static Builder newBuilder(com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure prototype) {
|
||||
return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype);
|
||||
}
|
||||
@java.lang.Override
|
||||
public Builder toBuilder() {
|
||||
return this == DEFAULT_INSTANCE
|
||||
? new Builder() : new Builder().mergeFrom(this);
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
protected Builder newBuilderForType(
|
||||
com.google.protobuf.GeneratedMessage.BuilderParent parent) {
|
||||
Builder builder = new Builder(parent);
|
||||
return builder;
|
||||
}
|
||||
/**
|
||||
* Protobuf type {@code DocumentStructure}
|
||||
*/
|
||||
public static final class Builder extends
|
||||
com.google.protobuf.GeneratedMessage.Builder<Builder> implements
|
||||
// @@protoc_insertion_point(builder_implements:DocumentStructure)
|
||||
com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructureOrBuilder {
|
||||
public static final com.google.protobuf.Descriptors.Descriptor
|
||||
getDescriptor() {
|
||||
return com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.internal_static_DocumentStructure_descriptor;
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
|
||||
internalGetFieldAccessorTable() {
|
||||
return com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.internal_static_DocumentStructure_fieldAccessorTable
|
||||
.ensureFieldAccessorsInitialized(
|
||||
com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure.class, com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure.Builder.class);
|
||||
}
|
||||
|
||||
// Construct using com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure.newBuilder()
|
||||
private Builder() {
|
||||
maybeForceBuilderInitialization();
|
||||
}
|
||||
|
||||
private Builder(
|
||||
com.google.protobuf.GeneratedMessage.BuilderParent parent) {
|
||||
super(parent);
|
||||
maybeForceBuilderInitialization();
|
||||
}
|
||||
private void maybeForceBuilderInitialization() {
|
||||
if (com.google.protobuf.GeneratedMessage
|
||||
.alwaysUseFieldBuilders) {
|
||||
getRootFieldBuilder();
|
||||
}
|
||||
}
|
||||
@java.lang.Override
|
||||
public Builder clear() {
|
||||
super.clear();
|
||||
bitField0_ = 0;
|
||||
root_ = null;
|
||||
if (rootBuilder_ != null) {
|
||||
rootBuilder_.dispose();
|
||||
rootBuilder_ = null;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public com.google.protobuf.Descriptors.Descriptor
|
||||
getDescriptorForType() {
|
||||
return com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.internal_static_DocumentStructure_descriptor;
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure getDefaultInstanceForType() {
|
||||
return com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure.getDefaultInstance();
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure build() {
|
||||
com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure result = buildPartial();
|
||||
if (!result.isInitialized()) {
|
||||
throw newUninitializedMessageException(result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure buildPartial() {
|
||||
com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure result = new com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure(this);
|
||||
if (bitField0_ != 0) { buildPartial0(result); }
|
||||
onBuilt();
|
||||
return result;
|
||||
}
|
||||
|
||||
private void buildPartial0(com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure result) {
|
||||
int from_bitField0_ = bitField0_;
|
||||
int to_bitField0_ = 0;
|
||||
if (((from_bitField0_ & 0x00000001) != 0)) {
|
||||
result.root_ = rootBuilder_ == null
|
||||
? root_
|
||||
: rootBuilder_.build();
|
||||
to_bitField0_ |= 0x00000001;
|
||||
}
|
||||
result.bitField0_ |= to_bitField0_;
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public Builder mergeFrom(com.google.protobuf.Message other) {
|
||||
if (other instanceof com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure) {
|
||||
return mergeFrom((com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure)other);
|
||||
} else {
|
||||
super.mergeFrom(other);
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
public Builder mergeFrom(com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure other) {
|
||||
if (other == com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure.getDefaultInstance()) return this;
|
||||
if (other.hasRoot()) {
|
||||
mergeRoot(other.getRoot());
|
||||
}
|
||||
this.mergeUnknownFields(other.getUnknownFields());
|
||||
onChanged();
|
||||
return this;
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public final boolean isInitialized() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public Builder mergeFrom(
|
||||
com.google.protobuf.CodedInputStream input,
|
||||
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
|
||||
throws java.io.IOException {
|
||||
if (extensionRegistry == null) {
|
||||
throw new java.lang.NullPointerException();
|
||||
}
|
||||
try {
|
||||
boolean done = false;
|
||||
while (!done) {
|
||||
int tag = input.readTag();
|
||||
switch (tag) {
|
||||
case 0:
|
||||
done = true;
|
||||
break;
|
||||
case 10: {
|
||||
input.readMessage(
|
||||
getRootFieldBuilder().getBuilder(),
|
||||
extensionRegistry);
|
||||
bitField0_ |= 0x00000001;
|
||||
break;
|
||||
} // case 10
|
||||
default: {
|
||||
if (!super.parseUnknownField(input, extensionRegistry, tag)) {
|
||||
done = true; // was an endgroup tag
|
||||
}
|
||||
break;
|
||||
} // default:
|
||||
} // switch (tag)
|
||||
} // while (!done)
|
||||
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
|
||||
throw e.unwrapIOException();
|
||||
} finally {
|
||||
onChanged();
|
||||
} // finally
|
||||
return this;
|
||||
}
|
||||
private int bitField0_;
|
||||
|
||||
private com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData root_;
|
||||
private com.google.protobuf.SingleFieldBuilder<
|
||||
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData, com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.Builder, com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryDataOrBuilder> rootBuilder_;
|
||||
/**
|
||||
* <pre>
|
||||
* The root EntryData represents the Document.
|
||||
* </pre>
|
||||
*
|
||||
* <code>.EntryData root = 1;</code>
|
||||
* @return Whether the root field is set.
|
||||
*/
|
||||
public boolean hasRoot() {
|
||||
return ((bitField0_ & 0x00000001) != 0);
|
||||
}
|
||||
/**
|
||||
* <pre>
|
||||
* The root EntryData represents the Document.
|
||||
* </pre>
|
||||
*
|
||||
* <code>.EntryData root = 1;</code>
|
||||
* @return The root.
|
||||
*/
|
||||
public com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData getRoot() {
|
||||
if (rootBuilder_ == null) {
|
||||
return root_ == null ? com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.getDefaultInstance() : root_;
|
||||
} else {
|
||||
return rootBuilder_.getMessage();
|
||||
}
|
||||
}
|
||||
/**
|
||||
* <pre>
|
||||
* The root EntryData represents the Document.
|
||||
* </pre>
|
||||
*
|
||||
* <code>.EntryData root = 1;</code>
|
||||
*/
|
||||
public Builder setRoot(com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData value) {
|
||||
if (rootBuilder_ == null) {
|
||||
if (value == null) {
|
||||
throw new NullPointerException();
|
||||
}
|
||||
root_ = value;
|
||||
} else {
|
||||
rootBuilder_.setMessage(value);
|
||||
}
|
||||
bitField0_ |= 0x00000001;
|
||||
onChanged();
|
||||
return this;
|
||||
}
|
||||
/**
|
||||
* <pre>
|
||||
* The root EntryData represents the Document.
|
||||
* </pre>
|
||||
*
|
||||
* <code>.EntryData root = 1;</code>
|
||||
*/
|
||||
public Builder setRoot(
|
||||
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.Builder builderForValue) {
|
||||
if (rootBuilder_ == null) {
|
||||
root_ = builderForValue.build();
|
||||
} else {
|
||||
rootBuilder_.setMessage(builderForValue.build());
|
||||
}
|
||||
bitField0_ |= 0x00000001;
|
||||
onChanged();
|
||||
return this;
|
||||
}
|
||||
/**
|
||||
* <pre>
|
||||
* The root EntryData represents the Document.
|
||||
* </pre>
|
||||
*
|
||||
* <code>.EntryData root = 1;</code>
|
||||
*/
|
||||
public Builder mergeRoot(com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData value) {
|
||||
if (rootBuilder_ == null) {
|
||||
if (((bitField0_ & 0x00000001) != 0) &&
|
||||
root_ != null &&
|
||||
root_ != com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.getDefaultInstance()) {
|
||||
getRootBuilder().mergeFrom(value);
|
||||
} else {
|
||||
root_ = value;
|
||||
}
|
||||
} else {
|
||||
rootBuilder_.mergeFrom(value);
|
||||
}
|
||||
if (root_ != null) {
|
||||
bitField0_ |= 0x00000001;
|
||||
onChanged();
|
||||
}
|
||||
return this;
|
||||
}
|
||||
/**
|
||||
* <pre>
|
||||
* The root EntryData represents the Document.
|
||||
* </pre>
|
||||
*
|
||||
* <code>.EntryData root = 1;</code>
|
||||
*/
|
||||
public Builder clearRoot() {
|
||||
bitField0_ = (bitField0_ & ~0x00000001);
|
||||
root_ = null;
|
||||
if (rootBuilder_ != null) {
|
||||
rootBuilder_.dispose();
|
||||
rootBuilder_ = null;
|
||||
}
|
||||
onChanged();
|
||||
return this;
|
||||
}
|
||||
/**
|
||||
* <pre>
|
||||
* The root EntryData represents the Document.
|
||||
* </pre>
|
||||
*
|
||||
* <code>.EntryData root = 1;</code>
|
||||
*/
|
||||
public com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.Builder getRootBuilder() {
|
||||
bitField0_ |= 0x00000001;
|
||||
onChanged();
|
||||
return getRootFieldBuilder().getBuilder();
|
||||
}
|
||||
/**
|
||||
* <pre>
|
||||
* The root EntryData represents the Document.
|
||||
* </pre>
|
||||
*
|
||||
* <code>.EntryData root = 1;</code>
|
||||
*/
|
||||
public com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryDataOrBuilder getRootOrBuilder() {
|
||||
if (rootBuilder_ != null) {
|
||||
return rootBuilder_.getMessageOrBuilder();
|
||||
} else {
|
||||
return root_ == null ?
|
||||
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.getDefaultInstance() : root_;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* <pre>
|
||||
* The root EntryData represents the Document.
|
||||
* </pre>
|
||||
*
|
||||
* <code>.EntryData root = 1;</code>
|
||||
*/
|
||||
private com.google.protobuf.SingleFieldBuilder<
|
||||
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData, com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.Builder, com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryDataOrBuilder>
|
||||
getRootFieldBuilder() {
|
||||
if (rootBuilder_ == null) {
|
||||
rootBuilder_ = new com.google.protobuf.SingleFieldBuilder<
|
||||
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData, com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.Builder, com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryDataOrBuilder>(
|
||||
getRoot(),
|
||||
getParentForChildren(),
|
||||
isClean());
|
||||
root_ = null;
|
||||
}
|
||||
return rootBuilder_;
|
||||
}
|
||||
|
||||
// @@protoc_insertion_point(builder_scope:DocumentStructure)
|
||||
}
|
||||
|
||||
// @@protoc_insertion_point(class_scope:DocumentStructure)
|
||||
private static final com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure DEFAULT_INSTANCE;
|
||||
static {
|
||||
DEFAULT_INSTANCE = new com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure();
|
||||
}
|
||||
|
||||
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure getDefaultInstance() {
|
||||
return DEFAULT_INSTANCE;
|
||||
}
|
||||
|
||||
private static final com.google.protobuf.Parser<DocumentStructure>
|
||||
PARSER = new com.google.protobuf.AbstractParser<DocumentStructure>() {
|
||||
@java.lang.Override
|
||||
public DocumentStructure parsePartialFrom(
|
||||
com.google.protobuf.CodedInputStream input,
|
||||
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
|
||||
throws com.google.protobuf.InvalidProtocolBufferException {
|
||||
Builder builder = newBuilder();
|
||||
try {
|
||||
builder.mergeFrom(input, extensionRegistry);
|
||||
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
|
||||
throw e.setUnfinishedMessage(builder.buildPartial());
|
||||
} catch (com.google.protobuf.UninitializedMessageException e) {
|
||||
throw e.asInvalidProtocolBufferException().setUnfinishedMessage(builder.buildPartial());
|
||||
} catch (java.io.IOException e) {
|
||||
throw new com.google.protobuf.InvalidProtocolBufferException(e)
|
||||
.setUnfinishedMessage(builder.buildPartial());
|
||||
}
|
||||
return builder.buildPartial();
|
||||
}
|
||||
};
|
||||
|
||||
public static com.google.protobuf.Parser<DocumentStructure> parser() {
|
||||
return PARSER;
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public com.google.protobuf.Parser<DocumentStructure> getParserForType() {
|
||||
return PARSER;
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure getDefaultInstanceForType() {
|
||||
return DEFAULT_INSTANCE;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static final com.google.protobuf.Descriptors.Descriptor
|
||||
internal_static_DocumentStructure_descriptor;
|
||||
private static final
|
||||
com.google.protobuf.GeneratedMessage.FieldAccessorTable
|
||||
internal_static_DocumentStructure_fieldAccessorTable;
|
||||
|
||||
public static com.google.protobuf.Descriptors.FileDescriptor
|
||||
getDescriptor() {
|
||||
return descriptor;
|
||||
}
|
||||
private static com.google.protobuf.Descriptors.FileDescriptor
|
||||
descriptor;
|
||||
static {
|
||||
java.lang.String[] descriptorData = {
|
||||
"\n\027DocumentStructure.proto\032\017EntryData.pro" +
|
||||
"to\"-\n\021DocumentStructure\022\030\n\004root\030\001 \001(\0132\n." +
|
||||
"EntryDataBH\n.com.iqser.red.service.redac" +
|
||||
"tion.v1.server.dataB\026DocumentStructurePr" +
|
||||
"otob\006proto3"
|
||||
};
|
||||
descriptor = com.google.protobuf.Descriptors.FileDescriptor
|
||||
.internalBuildGeneratedFileFrom(descriptorData,
|
||||
new com.google.protobuf.Descriptors.FileDescriptor[] {
|
||||
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.getDescriptor(),
|
||||
});
|
||||
internal_static_DocumentStructure_descriptor =
|
||||
getDescriptor().getMessageTypes().get(0);
|
||||
internal_static_DocumentStructure_fieldAccessorTable = new
|
||||
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
|
||||
internal_static_DocumentStructure_descriptor,
|
||||
new java.lang.String[] { "Root", });
|
||||
descriptor.resolveAllFeaturesImmutable();
|
||||
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.getDescriptor();
|
||||
}
|
||||
|
||||
// @@protoc_insertion_point(outer_class_scope)
|
||||
}
|
||||
@ -0,0 +1,115 @@
|
||||
package com.iqser.red.service.redaction.v1.server.data;
|
||||
|
||||
import static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure;
|
||||
import static com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.io.Serializable;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
|
||||
@Getter
|
||||
@AllArgsConstructor
|
||||
public class DocumentStructureWrapper implements Serializable {
|
||||
|
||||
private final DocumentStructure documentStructure;
|
||||
|
||||
public static class TableProperties implements Serializable {
|
||||
|
||||
public static final String NUMBER_OF_ROWS = "numberOfRows";
|
||||
public static final String NUMBER_OF_COLS = "numberOfCols";
|
||||
|
||||
}
|
||||
|
||||
public static class ImageProperties implements Serializable {
|
||||
|
||||
public static final String TRANSPARENT = "transparent";
|
||||
public static final String IMAGE_TYPE = "imageType";
|
||||
public static final String POSITION = "position";
|
||||
public static final String ID = "id";
|
||||
|
||||
public static final String REPRESENTATION_HASH = "representationHash";
|
||||
|
||||
}
|
||||
|
||||
public static class TableCellProperties implements Serializable {
|
||||
|
||||
public static final String B_BOX = "bBox";
|
||||
public static final String ROW = "row";
|
||||
public static final String COL = "col";
|
||||
public static final String HEADER = "header";
|
||||
|
||||
}
|
||||
|
||||
public static class DuplicateParagraphProperties implements Serializable {
|
||||
|
||||
public static final String UNSORTED_TEXTBLOCK_ID = "utbid";
|
||||
|
||||
}
|
||||
|
||||
public static final String RECTANGLE_DELIMITER = ";";
|
||||
|
||||
|
||||
public static Rectangle2D parseRectangle2D(String bBox) {
|
||||
|
||||
List<Float> floats = Arrays.stream(bBox.split(RECTANGLE_DELIMITER))
|
||||
.map(Float::parseFloat)
|
||||
.toList();
|
||||
return new Rectangle2D.Float(floats.get(0), floats.get(1), floats.get(2), floats.get(3));
|
||||
}
|
||||
|
||||
|
||||
public static double[] parseRepresentationVector(String representationHash) {
|
||||
|
||||
String[] stringArray = representationHash.split("[,\\s]+");
|
||||
double[] doubleArray = new double[stringArray.length];
|
||||
for (int i = 0; i < stringArray.length; i++) {
|
||||
doubleArray[i] = Double.parseDouble(stringArray[i]);
|
||||
}
|
||||
|
||||
return doubleArray;
|
||||
}
|
||||
|
||||
|
||||
public EntryData get(List<Integer> tocId) {
|
||||
|
||||
if (tocId.isEmpty()) {
|
||||
return documentStructure.getRoot();
|
||||
}
|
||||
EntryData entry = documentStructure.getRoot().getChildrenList()
|
||||
.get(tocId.get(0));
|
||||
for (int id : tocId.subList(1, tocId.size())) {
|
||||
entry = entry.getChildrenList()
|
||||
.get(id);
|
||||
}
|
||||
return entry;
|
||||
}
|
||||
|
||||
|
||||
public Stream<EntryData> streamAllEntries() {
|
||||
|
||||
return flatten(documentStructure.getRoot());
|
||||
}
|
||||
|
||||
|
||||
public String toString() {
|
||||
|
||||
return String.join("\n",
|
||||
streamAllEntries().map(EntryData::toString)
|
||||
.toList());
|
||||
}
|
||||
|
||||
|
||||
private static Stream<EntryData> flatten(EntryData entry) {
|
||||
|
||||
return Stream.concat(Stream.of(entry),
|
||||
entry.getChildrenList()
|
||||
.stream()
|
||||
.flatMap(DocumentStructureWrapper::flatten));
|
||||
}
|
||||
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,176 @@
|
||||
// Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
// NO CHECKED-IN PROTOBUF GENCODE
|
||||
// source: LayoutEngine.proto
|
||||
// Protobuf Java Version: 4.28.3
|
||||
|
||||
package com.iqser.red.service.redaction.v1.server.data;
|
||||
|
||||
public final class LayoutEngineProto {
|
||||
private LayoutEngineProto() {}
|
||||
static {
|
||||
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
|
||||
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
|
||||
/* major= */ 4,
|
||||
/* minor= */ 28,
|
||||
/* patch= */ 3,
|
||||
/* suffix= */ "",
|
||||
LayoutEngineProto.class.getName());
|
||||
}
|
||||
public static void registerAllExtensions(
|
||||
com.google.protobuf.ExtensionRegistryLite registry) {
|
||||
}
|
||||
|
||||
public static void registerAllExtensions(
|
||||
com.google.protobuf.ExtensionRegistry registry) {
|
||||
registerAllExtensions(
|
||||
(com.google.protobuf.ExtensionRegistryLite) registry);
|
||||
}
|
||||
/**
|
||||
* Protobuf enum {@code LayoutEngine}
|
||||
*/
|
||||
public enum LayoutEngine
|
||||
implements com.google.protobuf.ProtocolMessageEnum {
|
||||
/**
|
||||
* <code>ALGORITHM = 0;</code>
|
||||
*/
|
||||
ALGORITHM(0),
|
||||
/**
|
||||
* <code>AI = 1;</code>
|
||||
*/
|
||||
AI(1),
|
||||
/**
|
||||
* <code>OUTLINE = 2;</code>
|
||||
*/
|
||||
OUTLINE(2),
|
||||
UNRECOGNIZED(-1),
|
||||
;
|
||||
|
||||
static {
|
||||
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
|
||||
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
|
||||
/* major= */ 4,
|
||||
/* minor= */ 28,
|
||||
/* patch= */ 3,
|
||||
/* suffix= */ "",
|
||||
LayoutEngine.class.getName());
|
||||
}
|
||||
/**
|
||||
* <code>ALGORITHM = 0;</code>
|
||||
*/
|
||||
public static final int ALGORITHM_VALUE = 0;
|
||||
/**
|
||||
* <code>AI = 1;</code>
|
||||
*/
|
||||
public static final int AI_VALUE = 1;
|
||||
/**
|
||||
* <code>OUTLINE = 2;</code>
|
||||
*/
|
||||
public static final int OUTLINE_VALUE = 2;
|
||||
|
||||
|
||||
public final int getNumber() {
|
||||
if (this == UNRECOGNIZED) {
|
||||
throw new java.lang.IllegalArgumentException(
|
||||
"Can't get the number of an unknown enum value.");
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param value The numeric wire value of the corresponding enum entry.
|
||||
* @return The enum associated with the given numeric wire value.
|
||||
* @deprecated Use {@link #forNumber(int)} instead.
|
||||
*/
|
||||
@java.lang.Deprecated
|
||||
public static LayoutEngine valueOf(int value) {
|
||||
return forNumber(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param value The numeric wire value of the corresponding enum entry.
|
||||
* @return The enum associated with the given numeric wire value.
|
||||
*/
|
||||
public static LayoutEngine forNumber(int value) {
|
||||
switch (value) {
|
||||
case 0: return ALGORITHM;
|
||||
case 1: return AI;
|
||||
case 2: return OUTLINE;
|
||||
default: return null;
|
||||
}
|
||||
}
|
||||
|
||||
public static com.google.protobuf.Internal.EnumLiteMap<LayoutEngine>
|
||||
internalGetValueMap() {
|
||||
return internalValueMap;
|
||||
}
|
||||
private static final com.google.protobuf.Internal.EnumLiteMap<
|
||||
LayoutEngine> internalValueMap =
|
||||
new com.google.protobuf.Internal.EnumLiteMap<LayoutEngine>() {
|
||||
public LayoutEngine findValueByNumber(int number) {
|
||||
return LayoutEngine.forNumber(number);
|
||||
}
|
||||
};
|
||||
|
||||
public final com.google.protobuf.Descriptors.EnumValueDescriptor
|
||||
getValueDescriptor() {
|
||||
if (this == UNRECOGNIZED) {
|
||||
throw new java.lang.IllegalStateException(
|
||||
"Can't get the descriptor of an unrecognized enum value.");
|
||||
}
|
||||
return getDescriptor().getValues().get(ordinal());
|
||||
}
|
||||
public final com.google.protobuf.Descriptors.EnumDescriptor
|
||||
getDescriptorForType() {
|
||||
return getDescriptor();
|
||||
}
|
||||
public static final com.google.protobuf.Descriptors.EnumDescriptor
|
||||
getDescriptor() {
|
||||
return com.iqser.red.service.redaction.v1.server.data.LayoutEngineProto.getDescriptor().getEnumTypes().get(0);
|
||||
}
|
||||
|
||||
private static final LayoutEngine[] VALUES = values();
|
||||
|
||||
public static LayoutEngine valueOf(
|
||||
com.google.protobuf.Descriptors.EnumValueDescriptor desc) {
|
||||
if (desc.getType() != getDescriptor()) {
|
||||
throw new java.lang.IllegalArgumentException(
|
||||
"EnumValueDescriptor is not for this type.");
|
||||
}
|
||||
if (desc.getIndex() == -1) {
|
||||
return UNRECOGNIZED;
|
||||
}
|
||||
return VALUES[desc.getIndex()];
|
||||
}
|
||||
|
||||
private final int value;
|
||||
|
||||
private LayoutEngine(int value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
// @@protoc_insertion_point(enum_scope:LayoutEngine)
|
||||
}
|
||||
|
||||
|
||||
public static com.google.protobuf.Descriptors.FileDescriptor
|
||||
getDescriptor() {
|
||||
return descriptor;
|
||||
}
|
||||
private static com.google.protobuf.Descriptors.FileDescriptor
|
||||
descriptor;
|
||||
static {
|
||||
java.lang.String[] descriptorData = {
|
||||
"\n\022LayoutEngine.proto*2\n\014LayoutEngine\022\r\n\t" +
|
||||
"ALGORITHM\020\000\022\006\n\002AI\020\001\022\013\n\007OUTLINE\020\002BC\n.com." +
|
||||
"iqser.red.service.redaction.v1.server.da" +
|
||||
"taB\021LayoutEngineProtob\006proto3"
|
||||
};
|
||||
descriptor = com.google.protobuf.Descriptors.FileDescriptor
|
||||
.internalBuildGeneratedFileFrom(descriptorData,
|
||||
new com.google.protobuf.Descriptors.FileDescriptor[] {
|
||||
});
|
||||
descriptor.resolveAllFeaturesImmutable();
|
||||
}
|
||||
|
||||
// @@protoc_insertion_point(outer_class_scope)
|
||||
}
|
||||
@ -0,0 +1,261 @@
|
||||
// Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
// NO CHECKED-IN PROTOBUF GENCODE
|
||||
// source: NodeType.proto
|
||||
// Protobuf Java Version: 4.28.3
|
||||
|
||||
package com.iqser.red.service.redaction.v1.server.data;
|
||||
|
||||
public final class NodeTypeProto {
|
||||
private NodeTypeProto() {}
|
||||
static {
|
||||
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
|
||||
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
|
||||
/* major= */ 4,
|
||||
/* minor= */ 28,
|
||||
/* patch= */ 3,
|
||||
/* suffix= */ "",
|
||||
NodeTypeProto.class.getName());
|
||||
}
|
||||
public static void registerAllExtensions(
|
||||
com.google.protobuf.ExtensionRegistryLite registry) {
|
||||
}
|
||||
|
||||
public static void registerAllExtensions(
|
||||
com.google.protobuf.ExtensionRegistry registry) {
|
||||
registerAllExtensions(
|
||||
(com.google.protobuf.ExtensionRegistryLite) registry);
|
||||
}
|
||||
/**
|
||||
* Protobuf enum {@code NodeType}
|
||||
*/
|
||||
public enum NodeType
|
||||
implements com.google.protobuf.ProtocolMessageEnum {
|
||||
/**
|
||||
* <code>DOCUMENT = 0;</code>
|
||||
*/
|
||||
DOCUMENT(0),
|
||||
/**
|
||||
* <code>SECTION = 1;</code>
|
||||
*/
|
||||
SECTION(1),
|
||||
/**
|
||||
* <code>SUPER_SECTION = 2;</code>
|
||||
*/
|
||||
SUPER_SECTION(2),
|
||||
/**
|
||||
* <code>HEADLINE = 3;</code>
|
||||
*/
|
||||
HEADLINE(3),
|
||||
/**
|
||||
* <code>PARAGRAPH = 4;</code>
|
||||
*/
|
||||
PARAGRAPH(4),
|
||||
/**
|
||||
* <code>TABLE = 5;</code>
|
||||
*/
|
||||
TABLE(5),
|
||||
/**
|
||||
* <code>TABLE_CELL = 6;</code>
|
||||
*/
|
||||
TABLE_CELL(6),
|
||||
/**
|
||||
* <code>IMAGE = 7;</code>
|
||||
*/
|
||||
IMAGE(7),
|
||||
/**
|
||||
* <code>HEADER = 8;</code>
|
||||
*/
|
||||
HEADER(8),
|
||||
/**
|
||||
* <code>FOOTER = 9;</code>
|
||||
*/
|
||||
FOOTER(9),
|
||||
/**
|
||||
* <code>TABLE_OF_CONTENTS = 10;</code>
|
||||
*/
|
||||
TABLE_OF_CONTENTS(10),
|
||||
/**
|
||||
* <code>TABLE_OF_CONTENTS_ITEM = 11;</code>
|
||||
*/
|
||||
TABLE_OF_CONTENTS_ITEM(11),
|
||||
UNRECOGNIZED(-1),
|
||||
;
|
||||
|
||||
static {
|
||||
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
|
||||
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
|
||||
/* major= */ 4,
|
||||
/* minor= */ 28,
|
||||
/* patch= */ 3,
|
||||
/* suffix= */ "",
|
||||
NodeType.class.getName());
|
||||
}
|
||||
/**
|
||||
* <code>DOCUMENT = 0;</code>
|
||||
*/
|
||||
public static final int DOCUMENT_VALUE = 0;
|
||||
/**
|
||||
* <code>SECTION = 1;</code>
|
||||
*/
|
||||
public static final int SECTION_VALUE = 1;
|
||||
/**
|
||||
* <code>SUPER_SECTION = 2;</code>
|
||||
*/
|
||||
public static final int SUPER_SECTION_VALUE = 2;
|
||||
/**
|
||||
* <code>HEADLINE = 3;</code>
|
||||
*/
|
||||
public static final int HEADLINE_VALUE = 3;
|
||||
/**
|
||||
* <code>PARAGRAPH = 4;</code>
|
||||
*/
|
||||
public static final int PARAGRAPH_VALUE = 4;
|
||||
/**
|
||||
* <code>TABLE = 5;</code>
|
||||
*/
|
||||
public static final int TABLE_VALUE = 5;
|
||||
/**
|
||||
* <code>TABLE_CELL = 6;</code>
|
||||
*/
|
||||
public static final int TABLE_CELL_VALUE = 6;
|
||||
/**
|
||||
* <code>IMAGE = 7;</code>
|
||||
*/
|
||||
public static final int IMAGE_VALUE = 7;
|
||||
/**
|
||||
* <code>HEADER = 8;</code>
|
||||
*/
|
||||
public static final int HEADER_VALUE = 8;
|
||||
/**
|
||||
* <code>FOOTER = 9;</code>
|
||||
*/
|
||||
public static final int FOOTER_VALUE = 9;
|
||||
/**
|
||||
* <code>TABLE_OF_CONTENTS = 10;</code>
|
||||
*/
|
||||
public static final int TABLE_OF_CONTENTS_VALUE = 10;
|
||||
/**
|
||||
* <code>TABLE_OF_CONTENTS_ITEM = 11;</code>
|
||||
*/
|
||||
public static final int TABLE_OF_CONTENTS_ITEM_VALUE = 11;
|
||||
|
||||
|
||||
public final int getNumber() {
|
||||
if (this == UNRECOGNIZED) {
|
||||
throw new java.lang.IllegalArgumentException(
|
||||
"Can't get the number of an unknown enum value.");
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param value The numeric wire value of the corresponding enum entry.
|
||||
* @return The enum associated with the given numeric wire value.
|
||||
* @deprecated Use {@link #forNumber(int)} instead.
|
||||
*/
|
||||
@java.lang.Deprecated
|
||||
public static NodeType valueOf(int value) {
|
||||
return forNumber(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param value The numeric wire value of the corresponding enum entry.
|
||||
* @return The enum associated with the given numeric wire value.
|
||||
*/
|
||||
public static NodeType forNumber(int value) {
|
||||
switch (value) {
|
||||
case 0: return DOCUMENT;
|
||||
case 1: return SECTION;
|
||||
case 2: return SUPER_SECTION;
|
||||
case 3: return HEADLINE;
|
||||
case 4: return PARAGRAPH;
|
||||
case 5: return TABLE;
|
||||
case 6: return TABLE_CELL;
|
||||
case 7: return IMAGE;
|
||||
case 8: return HEADER;
|
||||
case 9: return FOOTER;
|
||||
case 10: return TABLE_OF_CONTENTS;
|
||||
case 11: return TABLE_OF_CONTENTS_ITEM;
|
||||
default: return null;
|
||||
}
|
||||
}
|
||||
|
||||
public static com.google.protobuf.Internal.EnumLiteMap<NodeType>
|
||||
internalGetValueMap() {
|
||||
return internalValueMap;
|
||||
}
|
||||
private static final com.google.protobuf.Internal.EnumLiteMap<
|
||||
NodeType> internalValueMap =
|
||||
new com.google.protobuf.Internal.EnumLiteMap<NodeType>() {
|
||||
public NodeType findValueByNumber(int number) {
|
||||
return NodeType.forNumber(number);
|
||||
}
|
||||
};
|
||||
|
||||
public final com.google.protobuf.Descriptors.EnumValueDescriptor
|
||||
getValueDescriptor() {
|
||||
if (this == UNRECOGNIZED) {
|
||||
throw new java.lang.IllegalStateException(
|
||||
"Can't get the descriptor of an unrecognized enum value.");
|
||||
}
|
||||
return getDescriptor().getValues().get(ordinal());
|
||||
}
|
||||
public final com.google.protobuf.Descriptors.EnumDescriptor
|
||||
getDescriptorForType() {
|
||||
return getDescriptor();
|
||||
}
|
||||
public static final com.google.protobuf.Descriptors.EnumDescriptor
|
||||
getDescriptor() {
|
||||
return com.iqser.red.service.redaction.v1.server.data.NodeTypeProto.getDescriptor().getEnumTypes().get(0);
|
||||
}
|
||||
|
||||
private static final NodeType[] VALUES = values();
|
||||
|
||||
public static NodeType valueOf(
|
||||
com.google.protobuf.Descriptors.EnumValueDescriptor desc) {
|
||||
if (desc.getType() != getDescriptor()) {
|
||||
throw new java.lang.IllegalArgumentException(
|
||||
"EnumValueDescriptor is not for this type.");
|
||||
}
|
||||
if (desc.getIndex() == -1) {
|
||||
return UNRECOGNIZED;
|
||||
}
|
||||
return VALUES[desc.getIndex()];
|
||||
}
|
||||
|
||||
private final int value;
|
||||
|
||||
private NodeType(int value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
// @@protoc_insertion_point(enum_scope:NodeType)
|
||||
}
|
||||
|
||||
|
||||
public static com.google.protobuf.Descriptors.FileDescriptor
|
||||
getDescriptor() {
|
||||
return descriptor;
|
||||
}
|
||||
private static com.google.protobuf.Descriptors.FileDescriptor
|
||||
descriptor;
|
||||
static {
|
||||
java.lang.String[] descriptorData = {
|
||||
"\n\016NodeType.proto*\306\001\n\010NodeType\022\014\n\010DOCUMEN" +
|
||||
"T\020\000\022\013\n\007SECTION\020\001\022\021\n\rSUPER_SECTION\020\002\022\014\n\010H" +
|
||||
"EADLINE\020\003\022\r\n\tPARAGRAPH\020\004\022\t\n\005TABLE\020\005\022\016\n\nT" +
|
||||
"ABLE_CELL\020\006\022\t\n\005IMAGE\020\007\022\n\n\006HEADER\020\010\022\n\n\006FO" +
|
||||
"OTER\020\t\022\025\n\021TABLE_OF_CONTENTS\020\n\022\032\n\026TABLE_O" +
|
||||
"F_CONTENTS_ITEM\020\013B?\n.com.iqser.red.servi" +
|
||||
"ce.redaction.v1.server.dataB\rNodeTypePro" +
|
||||
"tob\006proto3"
|
||||
};
|
||||
descriptor = com.google.protobuf.Descriptors.FileDescriptor
|
||||
.internalBuildGeneratedFileFrom(descriptorData,
|
||||
new com.google.protobuf.Descriptors.FileDescriptor[] {
|
||||
});
|
||||
descriptor.resolveAllFeaturesImmutable();
|
||||
}
|
||||
|
||||
// @@protoc_insertion_point(outer_class_scope)
|
||||
}
|
||||
@ -0,0 +1,606 @@
|
||||
// Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
// NO CHECKED-IN PROTOBUF GENCODE
|
||||
// source: Range.proto
|
||||
// Protobuf Java Version: 4.28.3
|
||||
|
||||
package com.iqser.red.service.redaction.v1.server.data;
|
||||
|
||||
public final class RangeProto {
|
||||
private RangeProto() {}
|
||||
static {
|
||||
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
|
||||
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
|
||||
/* major= */ 4,
|
||||
/* minor= */ 28,
|
||||
/* patch= */ 3,
|
||||
/* suffix= */ "",
|
||||
RangeProto.class.getName());
|
||||
}
|
||||
public static void registerAllExtensions(
|
||||
com.google.protobuf.ExtensionRegistryLite registry) {
|
||||
}
|
||||
|
||||
public static void registerAllExtensions(
|
||||
com.google.protobuf.ExtensionRegistry registry) {
|
||||
registerAllExtensions(
|
||||
(com.google.protobuf.ExtensionRegistryLite) registry);
|
||||
}
|
||||
public interface RangeOrBuilder extends
|
||||
// @@protoc_insertion_point(interface_extends:Range)
|
||||
com.google.protobuf.MessageOrBuilder {
|
||||
|
||||
/**
|
||||
* <pre>
|
||||
* A start index.
|
||||
* </pre>
|
||||
*
|
||||
* <code>int32 start = 1;</code>
|
||||
* @return The start.
|
||||
*/
|
||||
int getStart();
|
||||
|
||||
/**
|
||||
* <pre>
|
||||
* An end index.
|
||||
* </pre>
|
||||
*
|
||||
* <code>int32 end = 2;</code>
|
||||
* @return The end.
|
||||
*/
|
||||
int getEnd();
|
||||
}
|
||||
/**
|
||||
* Protobuf type {@code Range}
|
||||
*/
|
||||
public static final class Range extends
|
||||
com.google.protobuf.GeneratedMessage implements
|
||||
// @@protoc_insertion_point(message_implements:Range)
|
||||
RangeOrBuilder {
|
||||
private static final long serialVersionUID = 0L;
|
||||
static {
|
||||
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
|
||||
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
|
||||
/* major= */ 4,
|
||||
/* minor= */ 28,
|
||||
/* patch= */ 3,
|
||||
/* suffix= */ "",
|
||||
Range.class.getName());
|
||||
}
|
||||
// Use Range.newBuilder() to construct.
|
||||
private Range(com.google.protobuf.GeneratedMessage.Builder<?> builder) {
|
||||
super(builder);
|
||||
}
|
||||
private Range() {
|
||||
}
|
||||
|
||||
public static final com.google.protobuf.Descriptors.Descriptor
|
||||
getDescriptor() {
|
||||
return com.iqser.red.service.redaction.v1.server.data.RangeProto.internal_static_Range_descriptor;
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
|
||||
internalGetFieldAccessorTable() {
|
||||
return com.iqser.red.service.redaction.v1.server.data.RangeProto.internal_static_Range_fieldAccessorTable
|
||||
.ensureFieldAccessorsInitialized(
|
||||
com.iqser.red.service.redaction.v1.server.data.RangeProto.Range.class, com.iqser.red.service.redaction.v1.server.data.RangeProto.Range.Builder.class);
|
||||
}
|
||||
|
||||
public static final int START_FIELD_NUMBER = 1;
|
||||
private int start_ = 0;
|
||||
/**
|
||||
* <pre>
|
||||
* A start index.
|
||||
* </pre>
|
||||
*
|
||||
* <code>int32 start = 1;</code>
|
||||
* @return The start.
|
||||
*/
|
||||
@java.lang.Override
|
||||
public int getStart() {
|
||||
return start_;
|
||||
}
|
||||
|
||||
public static final int END_FIELD_NUMBER = 2;
|
||||
private int end_ = 0;
|
||||
/**
|
||||
* <pre>
|
||||
* An end index.
|
||||
* </pre>
|
||||
*
|
||||
* <code>int32 end = 2;</code>
|
||||
* @return The end.
|
||||
*/
|
||||
@java.lang.Override
|
||||
public int getEnd() {
|
||||
return end_;
|
||||
}
|
||||
|
||||
private byte memoizedIsInitialized = -1;
|
||||
@java.lang.Override
|
||||
public final boolean isInitialized() {
|
||||
byte isInitialized = memoizedIsInitialized;
|
||||
if (isInitialized == 1) return true;
|
||||
if (isInitialized == 0) return false;
|
||||
|
||||
memoizedIsInitialized = 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public void writeTo(com.google.protobuf.CodedOutputStream output)
|
||||
throws java.io.IOException {
|
||||
if (start_ != 0) {
|
||||
output.writeInt32(1, start_);
|
||||
}
|
||||
if (end_ != 0) {
|
||||
output.writeInt32(2, end_);
|
||||
}
|
||||
getUnknownFields().writeTo(output);
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public int getSerializedSize() {
|
||||
int size = memoizedSize;
|
||||
if (size != -1) return size;
|
||||
|
||||
size = 0;
|
||||
if (start_ != 0) {
|
||||
size += com.google.protobuf.CodedOutputStream
|
||||
.computeInt32Size(1, start_);
|
||||
}
|
||||
if (end_ != 0) {
|
||||
size += com.google.protobuf.CodedOutputStream
|
||||
.computeInt32Size(2, end_);
|
||||
}
|
||||
size += getUnknownFields().getSerializedSize();
|
||||
memoizedSize = size;
|
||||
return size;
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public boolean equals(final java.lang.Object obj) {
|
||||
if (obj == this) {
|
||||
return true;
|
||||
}
|
||||
if (!(obj instanceof com.iqser.red.service.redaction.v1.server.data.RangeProto.Range)) {
|
||||
return super.equals(obj);
|
||||
}
|
||||
com.iqser.red.service.redaction.v1.server.data.RangeProto.Range other = (com.iqser.red.service.redaction.v1.server.data.RangeProto.Range) obj;
|
||||
|
||||
if (getStart()
|
||||
!= other.getStart()) return false;
|
||||
if (getEnd()
|
||||
!= other.getEnd()) return false;
|
||||
if (!getUnknownFields().equals(other.getUnknownFields())) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public int hashCode() {
|
||||
if (memoizedHashCode != 0) {
|
||||
return memoizedHashCode;
|
||||
}
|
||||
int hash = 41;
|
||||
hash = (19 * hash) + getDescriptor().hashCode();
|
||||
hash = (37 * hash) + START_FIELD_NUMBER;
|
||||
hash = (53 * hash) + getStart();
|
||||
hash = (37 * hash) + END_FIELD_NUMBER;
|
||||
hash = (53 * hash) + getEnd();
|
||||
hash = (29 * hash) + getUnknownFields().hashCode();
|
||||
memoizedHashCode = hash;
|
||||
return hash;
|
||||
}
|
||||
|
||||
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(
|
||||
java.nio.ByteBuffer data)
|
||||
throws com.google.protobuf.InvalidProtocolBufferException {
|
||||
return PARSER.parseFrom(data);
|
||||
}
|
||||
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(
|
||||
java.nio.ByteBuffer data,
|
||||
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
|
||||
throws com.google.protobuf.InvalidProtocolBufferException {
|
||||
return PARSER.parseFrom(data, extensionRegistry);
|
||||
}
|
||||
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(
|
||||
com.google.protobuf.ByteString data)
|
||||
throws com.google.protobuf.InvalidProtocolBufferException {
|
||||
return PARSER.parseFrom(data);
|
||||
}
|
||||
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(
|
||||
com.google.protobuf.ByteString data,
|
||||
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
|
||||
throws com.google.protobuf.InvalidProtocolBufferException {
|
||||
return PARSER.parseFrom(data, extensionRegistry);
|
||||
}
|
||||
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(byte[] data)
|
||||
throws com.google.protobuf.InvalidProtocolBufferException {
|
||||
return PARSER.parseFrom(data);
|
||||
}
|
||||
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(
|
||||
byte[] data,
|
||||
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
|
||||
throws com.google.protobuf.InvalidProtocolBufferException {
|
||||
return PARSER.parseFrom(data, extensionRegistry);
|
||||
}
|
||||
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(java.io.InputStream input)
|
||||
throws java.io.IOException {
|
||||
return com.google.protobuf.GeneratedMessage
|
||||
.parseWithIOException(PARSER, input);
|
||||
}
|
||||
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(
|
||||
java.io.InputStream input,
|
||||
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
|
||||
throws java.io.IOException {
|
||||
return com.google.protobuf.GeneratedMessage
|
||||
.parseWithIOException(PARSER, input, extensionRegistry);
|
||||
}
|
||||
|
||||
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseDelimitedFrom(java.io.InputStream input)
|
||||
throws java.io.IOException {
|
||||
return com.google.protobuf.GeneratedMessage
|
||||
.parseDelimitedWithIOException(PARSER, input);
|
||||
}
|
||||
|
||||
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseDelimitedFrom(
|
||||
java.io.InputStream input,
|
||||
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
|
||||
throws java.io.IOException {
|
||||
return com.google.protobuf.GeneratedMessage
|
||||
.parseDelimitedWithIOException(PARSER, input, extensionRegistry);
|
||||
}
|
||||
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(
|
||||
com.google.protobuf.CodedInputStream input)
|
||||
throws java.io.IOException {
|
||||
return com.google.protobuf.GeneratedMessage
|
||||
.parseWithIOException(PARSER, input);
|
||||
}
|
||||
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(
|
||||
com.google.protobuf.CodedInputStream input,
|
||||
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
|
||||
throws java.io.IOException {
|
||||
return com.google.protobuf.GeneratedMessage
|
||||
.parseWithIOException(PARSER, input, extensionRegistry);
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public Builder newBuilderForType() { return newBuilder(); }
|
||||
public static Builder newBuilder() {
|
||||
return DEFAULT_INSTANCE.toBuilder();
|
||||
}
|
||||
public static Builder newBuilder(com.iqser.red.service.redaction.v1.server.data.RangeProto.Range prototype) {
|
||||
return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype);
|
||||
}
|
||||
@java.lang.Override
|
||||
public Builder toBuilder() {
|
||||
return this == DEFAULT_INSTANCE
|
||||
? new Builder() : new Builder().mergeFrom(this);
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
protected Builder newBuilderForType(
|
||||
com.google.protobuf.GeneratedMessage.BuilderParent parent) {
|
||||
Builder builder = new Builder(parent);
|
||||
return builder;
|
||||
}
|
||||
/**
|
||||
* Protobuf type {@code Range}
|
||||
*/
|
||||
public static final class Builder extends
|
||||
com.google.protobuf.GeneratedMessage.Builder<Builder> implements
|
||||
// @@protoc_insertion_point(builder_implements:Range)
|
||||
com.iqser.red.service.redaction.v1.server.data.RangeProto.RangeOrBuilder {
|
||||
public static final com.google.protobuf.Descriptors.Descriptor
|
||||
getDescriptor() {
|
||||
return com.iqser.red.service.redaction.v1.server.data.RangeProto.internal_static_Range_descriptor;
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
|
||||
internalGetFieldAccessorTable() {
|
||||
return com.iqser.red.service.redaction.v1.server.data.RangeProto.internal_static_Range_fieldAccessorTable
|
||||
.ensureFieldAccessorsInitialized(
|
||||
com.iqser.red.service.redaction.v1.server.data.RangeProto.Range.class, com.iqser.red.service.redaction.v1.server.data.RangeProto.Range.Builder.class);
|
||||
}
|
||||
|
||||
// Construct using com.iqser.red.service.redaction.v1.server.data.RangeProto.Range.newBuilder()
|
||||
private Builder() {
|
||||
|
||||
}
|
||||
|
||||
private Builder(
|
||||
com.google.protobuf.GeneratedMessage.BuilderParent parent) {
|
||||
super(parent);
|
||||
|
||||
}
|
||||
@java.lang.Override
|
||||
public Builder clear() {
|
||||
super.clear();
|
||||
bitField0_ = 0;
|
||||
start_ = 0;
|
||||
end_ = 0;
|
||||
return this;
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public com.google.protobuf.Descriptors.Descriptor
|
||||
getDescriptorForType() {
|
||||
return com.iqser.red.service.redaction.v1.server.data.RangeProto.internal_static_Range_descriptor;
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public com.iqser.red.service.redaction.v1.server.data.RangeProto.Range getDefaultInstanceForType() {
|
||||
return com.iqser.red.service.redaction.v1.server.data.RangeProto.Range.getDefaultInstance();
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public com.iqser.red.service.redaction.v1.server.data.RangeProto.Range build() {
|
||||
com.iqser.red.service.redaction.v1.server.data.RangeProto.Range result = buildPartial();
|
||||
if (!result.isInitialized()) {
|
||||
throw newUninitializedMessageException(result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public com.iqser.red.service.redaction.v1.server.data.RangeProto.Range buildPartial() {
|
||||
com.iqser.red.service.redaction.v1.server.data.RangeProto.Range result = new com.iqser.red.service.redaction.v1.server.data.RangeProto.Range(this);
|
||||
if (bitField0_ != 0) { buildPartial0(result); }
|
||||
onBuilt();
|
||||
return result;
|
||||
}
|
||||
|
||||
private void buildPartial0(com.iqser.red.service.redaction.v1.server.data.RangeProto.Range result) {
|
||||
int from_bitField0_ = bitField0_;
|
||||
if (((from_bitField0_ & 0x00000001) != 0)) {
|
||||
result.start_ = start_;
|
||||
}
|
||||
if (((from_bitField0_ & 0x00000002) != 0)) {
|
||||
result.end_ = end_;
|
||||
}
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public Builder mergeFrom(com.google.protobuf.Message other) {
|
||||
if (other instanceof com.iqser.red.service.redaction.v1.server.data.RangeProto.Range) {
|
||||
return mergeFrom((com.iqser.red.service.redaction.v1.server.data.RangeProto.Range)other);
|
||||
} else {
|
||||
super.mergeFrom(other);
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
public Builder mergeFrom(com.iqser.red.service.redaction.v1.server.data.RangeProto.Range other) {
|
||||
if (other == com.iqser.red.service.redaction.v1.server.data.RangeProto.Range.getDefaultInstance()) return this;
|
||||
if (other.getStart() != 0) {
|
||||
setStart(other.getStart());
|
||||
}
|
||||
if (other.getEnd() != 0) {
|
||||
setEnd(other.getEnd());
|
||||
}
|
||||
this.mergeUnknownFields(other.getUnknownFields());
|
||||
onChanged();
|
||||
return this;
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public final boolean isInitialized() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public Builder mergeFrom(
|
||||
com.google.protobuf.CodedInputStream input,
|
||||
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
|
||||
throws java.io.IOException {
|
||||
if (extensionRegistry == null) {
|
||||
throw new java.lang.NullPointerException();
|
||||
}
|
||||
try {
|
||||
boolean done = false;
|
||||
while (!done) {
|
||||
int tag = input.readTag();
|
||||
switch (tag) {
|
||||
case 0:
|
||||
done = true;
|
||||
break;
|
||||
case 8: {
|
||||
start_ = input.readInt32();
|
||||
bitField0_ |= 0x00000001;
|
||||
break;
|
||||
} // case 8
|
||||
case 16: {
|
||||
end_ = input.readInt32();
|
||||
bitField0_ |= 0x00000002;
|
||||
break;
|
||||
} // case 16
|
||||
default: {
|
||||
if (!super.parseUnknownField(input, extensionRegistry, tag)) {
|
||||
done = true; // was an endgroup tag
|
||||
}
|
||||
break;
|
||||
} // default:
|
||||
} // switch (tag)
|
||||
} // while (!done)
|
||||
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
|
||||
throw e.unwrapIOException();
|
||||
} finally {
|
||||
onChanged();
|
||||
} // finally
|
||||
return this;
|
||||
}
|
||||
private int bitField0_;
|
||||
|
||||
private int start_ ;
|
||||
/**
|
||||
* <pre>
|
||||
* A start index.
|
||||
* </pre>
|
||||
*
|
||||
* <code>int32 start = 1;</code>
|
||||
* @return The start.
|
||||
*/
|
||||
@java.lang.Override
|
||||
public int getStart() {
|
||||
return start_;
|
||||
}
|
||||
/**
|
||||
* <pre>
|
||||
* A start index.
|
||||
* </pre>
|
||||
*
|
||||
* <code>int32 start = 1;</code>
|
||||
* @param value The start to set.
|
||||
* @return This builder for chaining.
|
||||
*/
|
||||
public Builder setStart(int value) {
|
||||
|
||||
start_ = value;
|
||||
bitField0_ |= 0x00000001;
|
||||
onChanged();
|
||||
return this;
|
||||
}
|
||||
/**
|
||||
* <pre>
|
||||
* A start index.
|
||||
* </pre>
|
||||
*
|
||||
* <code>int32 start = 1;</code>
|
||||
* @return This builder for chaining.
|
||||
*/
|
||||
public Builder clearStart() {
|
||||
bitField0_ = (bitField0_ & ~0x00000001);
|
||||
start_ = 0;
|
||||
onChanged();
|
||||
return this;
|
||||
}
|
||||
|
||||
private int end_ ;
|
||||
/**
|
||||
* <pre>
|
||||
* An end index.
|
||||
* </pre>
|
||||
*
|
||||
* <code>int32 end = 2;</code>
|
||||
* @return The end.
|
||||
*/
|
||||
@java.lang.Override
|
||||
public int getEnd() {
|
||||
return end_;
|
||||
}
|
||||
/**
|
||||
* <pre>
|
||||
* An end index.
|
||||
* </pre>
|
||||
*
|
||||
* <code>int32 end = 2;</code>
|
||||
* @param value The end to set.
|
||||
* @return This builder for chaining.
|
||||
*/
|
||||
public Builder setEnd(int value) {
|
||||
|
||||
end_ = value;
|
||||
bitField0_ |= 0x00000002;
|
||||
onChanged();
|
||||
return this;
|
||||
}
|
||||
/**
|
||||
* <pre>
|
||||
* An end index.
|
||||
* </pre>
|
||||
*
|
||||
* <code>int32 end = 2;</code>
|
||||
* @return This builder for chaining.
|
||||
*/
|
||||
public Builder clearEnd() {
|
||||
bitField0_ = (bitField0_ & ~0x00000002);
|
||||
end_ = 0;
|
||||
onChanged();
|
||||
return this;
|
||||
}
|
||||
|
||||
// @@protoc_insertion_point(builder_scope:Range)
|
||||
}
|
||||
|
||||
// @@protoc_insertion_point(class_scope:Range)
|
||||
private static final com.iqser.red.service.redaction.v1.server.data.RangeProto.Range DEFAULT_INSTANCE;
|
||||
static {
|
||||
DEFAULT_INSTANCE = new com.iqser.red.service.redaction.v1.server.data.RangeProto.Range();
|
||||
}
|
||||
|
||||
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range getDefaultInstance() {
|
||||
return DEFAULT_INSTANCE;
|
||||
}
|
||||
|
||||
private static final com.google.protobuf.Parser<Range>
|
||||
PARSER = new com.google.protobuf.AbstractParser<Range>() {
|
||||
@java.lang.Override
|
||||
public Range parsePartialFrom(
|
||||
com.google.protobuf.CodedInputStream input,
|
||||
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
|
||||
throws com.google.protobuf.InvalidProtocolBufferException {
|
||||
Builder builder = newBuilder();
|
||||
try {
|
||||
builder.mergeFrom(input, extensionRegistry);
|
||||
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
|
||||
throw e.setUnfinishedMessage(builder.buildPartial());
|
||||
} catch (com.google.protobuf.UninitializedMessageException e) {
|
||||
throw e.asInvalidProtocolBufferException().setUnfinishedMessage(builder.buildPartial());
|
||||
} catch (java.io.IOException e) {
|
||||
throw new com.google.protobuf.InvalidProtocolBufferException(e)
|
||||
.setUnfinishedMessage(builder.buildPartial());
|
||||
}
|
||||
return builder.buildPartial();
|
||||
}
|
||||
};
|
||||
|
||||
public static com.google.protobuf.Parser<Range> parser() {
|
||||
return PARSER;
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public com.google.protobuf.Parser<Range> getParserForType() {
|
||||
return PARSER;
|
||||
}
|
||||
|
||||
@java.lang.Override
|
||||
public com.iqser.red.service.redaction.v1.server.data.RangeProto.Range getDefaultInstanceForType() {
|
||||
return DEFAULT_INSTANCE;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static final com.google.protobuf.Descriptors.Descriptor
|
||||
internal_static_Range_descriptor;
|
||||
private static final
|
||||
com.google.protobuf.GeneratedMessage.FieldAccessorTable
|
||||
internal_static_Range_fieldAccessorTable;
|
||||
|
||||
public static com.google.protobuf.Descriptors.FileDescriptor
|
||||
getDescriptor() {
|
||||
return descriptor;
|
||||
}
|
||||
private static com.google.protobuf.Descriptors.FileDescriptor
|
||||
descriptor;
|
||||
static {
|
||||
java.lang.String[] descriptorData = {
|
||||
"\n\013Range.proto\"#\n\005Range\022\r\n\005start\030\001 \001(\005\022\013\n" +
|
||||
"\003end\030\002 \001(\005B<\n.com.iqser.red.service.reda" +
|
||||
"ction.v1.server.dataB\nRangeProtob\006proto3"
|
||||
};
|
||||
descriptor = com.google.protobuf.Descriptors.FileDescriptor
|
||||
.internalBuildGeneratedFileFrom(descriptorData,
|
||||
new com.google.protobuf.Descriptors.FileDescriptor[] {
|
||||
});
|
||||
internal_static_Range_descriptor =
|
||||
getDescriptor().getMessageTypes().get(0);
|
||||
internal_static_Range_fieldAccessorTable = new
|
||||
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
|
||||
internal_static_Range_descriptor,
|
||||
new java.lang.String[] { "Start", "End", });
|
||||
descriptor.resolveAllFeaturesImmutable();
|
||||
}
|
||||
|
||||
// @@protoc_insertion_point(outer_class_scope)
|
||||
}
|
||||
@ -0,0 +1,25 @@
|
||||
package com.iqser.red.service.redaction.v1.server.data.old;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
@Deprecated
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class DocumentPage implements Serializable {
|
||||
|
||||
int number;
|
||||
int height;
|
||||
int width;
|
||||
int rotation;
|
||||
|
||||
}
|
||||
@ -0,0 +1,24 @@
|
||||
package com.iqser.red.service.redaction.v1.server.data.old;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
@Deprecated
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class DocumentPositionData implements Serializable {
|
||||
|
||||
Long id;
|
||||
int[] stringIdxToPositionIdx;
|
||||
float[][] positions;
|
||||
|
||||
}
|
||||
@ -0,0 +1,158 @@
|
||||
package com.iqser.red.service.redaction.v1.server.data.old;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.io.Serializable;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
@Deprecated
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class DocumentStructure implements Serializable {
|
||||
|
||||
EntryData root;
|
||||
|
||||
public static class TableProperties implements Serializable {
|
||||
|
||||
public static final String NUMBER_OF_ROWS = "numberOfRows";
|
||||
public static final String NUMBER_OF_COLS = "numberOfCols";
|
||||
|
||||
}
|
||||
|
||||
public static class ImageProperties implements Serializable {
|
||||
|
||||
public static final String TRANSPARENT = "transparent";
|
||||
public static final String IMAGE_TYPE = "imageType";
|
||||
public static final String POSITION = "position";
|
||||
public static final String ID = "id";
|
||||
|
||||
public static final String REPRESENTATION_HASH = "representationHash";
|
||||
|
||||
}
|
||||
|
||||
public static class TableCellProperties implements Serializable {
|
||||
|
||||
public static final String B_BOX = "bBox";
|
||||
public static final String ROW = "row";
|
||||
public static final String COL = "col";
|
||||
public static final String HEADER = "header";
|
||||
|
||||
}
|
||||
|
||||
public static class DuplicateParagraphProperties implements Serializable {
|
||||
|
||||
public static final String UNSORTED_TEXTBLOCK_ID = "utbid";
|
||||
|
||||
}
|
||||
|
||||
public static final String RECTANGLE_DELIMITER = ";";
|
||||
|
||||
|
||||
public static Rectangle2D parseRectangle2D(String bBox) {
|
||||
|
||||
List<Float> floats = Arrays.stream(bBox.split(RECTANGLE_DELIMITER))
|
||||
.map(Float::parseFloat)
|
||||
.toList();
|
||||
return new Rectangle2D.Float(floats.get(0), floats.get(1), floats.get(2), floats.get(3));
|
||||
}
|
||||
|
||||
|
||||
public static double[] parseRepresentationVector(String representationHash) {
|
||||
|
||||
String[] stringArray = representationHash.split("[,\\s]+");
|
||||
double[] doubleArray = new double[stringArray.length];
|
||||
for (int i = 0; i < stringArray.length; i++) {
|
||||
doubleArray[i] = Double.parseDouble(stringArray[i]);
|
||||
}
|
||||
|
||||
return doubleArray;
|
||||
}
|
||||
|
||||
|
||||
public EntryData get(List<Integer> tocId) {
|
||||
|
||||
if (tocId.isEmpty()) {
|
||||
return root;
|
||||
}
|
||||
EntryData entry = root.children.get(tocId.get(0));
|
||||
for (int id : tocId.subList(1, tocId.size())) {
|
||||
entry = entry.children.get(id);
|
||||
}
|
||||
return entry;
|
||||
}
|
||||
|
||||
|
||||
public Stream<EntryData> streamAllEntries() {
|
||||
|
||||
return Stream.concat(Stream.of(root), root.children.stream())
|
||||
.flatMap(DocumentStructure::flatten);
|
||||
}
|
||||
|
||||
|
||||
public String toString() {
|
||||
|
||||
return String.join("\n",
|
||||
streamAllEntries().map(EntryData::toString)
|
||||
.toList());
|
||||
}
|
||||
|
||||
|
||||
private static Stream<EntryData> flatten(EntryData entry) {
|
||||
|
||||
return Stream.concat(Stream.of(entry),
|
||||
entry.children.stream()
|
||||
.flatMap(DocumentStructure::flatten));
|
||||
}
|
||||
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public static class EntryData implements Serializable {
|
||||
|
||||
NodeType type;
|
||||
int[] treeId;
|
||||
Long[] atomicBlockIds;
|
||||
Long[] pageNumbers;
|
||||
Map<String, String> properties;
|
||||
List<EntryData> children;
|
||||
Set<LayoutEngine> engines;
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("[");
|
||||
for (int i : treeId) {
|
||||
sb.append(i);
|
||||
sb.append(",");
|
||||
}
|
||||
sb.delete(sb.length() - 1, sb.length());
|
||||
sb.append("]: ");
|
||||
|
||||
sb.append(type);
|
||||
sb.append(" atbs = ");
|
||||
sb.append(atomicBlockIds.length);
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,28 @@
|
||||
package com.iqser.red.service.redaction.v1.server.data.old;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
@Deprecated
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class DocumentTextData implements Serializable {
|
||||
|
||||
Long id;
|
||||
Long page;
|
||||
String searchText;
|
||||
int numberOnPage;
|
||||
int start;
|
||||
int end;
|
||||
int[] lineBreaks;
|
||||
|
||||
}
|
||||
@ -0,0 +1,8 @@
|
||||
package com.iqser.red.service.redaction.v1.server.data.old;
|
||||
|
||||
@Deprecated
|
||||
public enum LayoutEngine {
|
||||
ALGORITHM,
|
||||
AI,
|
||||
OUTLINE
|
||||
}
|
||||
@ -0,0 +1,24 @@
|
||||
package com.iqser.red.service.redaction.v1.server.data.old;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Locale;
|
||||
|
||||
@Deprecated
|
||||
public enum NodeType implements Serializable {
|
||||
DOCUMENT,
|
||||
SECTION,
|
||||
SUPER_SECTION,
|
||||
HEADLINE,
|
||||
PARAGRAPH,
|
||||
TABLE,
|
||||
TABLE_CELL,
|
||||
IMAGE,
|
||||
HEADER,
|
||||
FOOTER;
|
||||
|
||||
|
||||
public String toString() {
|
||||
|
||||
return this.name().charAt(0) + this.name().substring(1).toLowerCase(Locale.ROOT);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,199 @@
|
||||
package com.iqser.red.service.redaction.v1.server.mapper;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.data.DocumentData;
|
||||
import com.iqser.red.service.redaction.v1.server.data.DocumentPageProto.AllDocumentPages;
|
||||
import com.iqser.red.service.redaction.v1.server.data.DocumentPageProto.DocumentPage;
|
||||
import com.iqser.red.service.redaction.v1.server.data.DocumentPositionDataProto.AllDocumentPositionData;
|
||||
import com.iqser.red.service.redaction.v1.server.data.DocumentPositionDataProto.DocumentPositionData;
|
||||
import com.iqser.red.service.redaction.v1.server.data.DocumentPositionDataProto.DocumentPositionData.Position;
|
||||
import com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure;
|
||||
import com.iqser.red.service.redaction.v1.server.data.DocumentStructureWrapper;
|
||||
import com.iqser.red.service.redaction.v1.server.data.DocumentTextDataProto.AllDocumentTextData;
|
||||
import com.iqser.red.service.redaction.v1.server.data.DocumentTextDataProto.DocumentTextData;
|
||||
import com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData;
|
||||
import com.iqser.red.service.redaction.v1.server.data.LayoutEngineProto;
|
||||
import com.iqser.red.service.redaction.v1.server.data.NodeTypeProto;
|
||||
import com.iqser.red.service.redaction.v1.server.data.RangeProto;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.DuplicatedParagraph;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
public class DocumentDataMapper {
|
||||
|
||||
public DocumentData toDocumentData(Document document) {
|
||||
|
||||
List<DocumentTextData> documentTextData = document.streamTerminalTextBlocksInOrder()
|
||||
.flatMap(textBlock -> textBlock.getAtomicTextBlocks()
|
||||
.stream())
|
||||
.distinct()
|
||||
.map(DocumentDataMapper::toAtomicTextBlockData)
|
||||
.toList();
|
||||
|
||||
AllDocumentTextData allDocumentTextData = AllDocumentTextData.newBuilder().addAllDocumentTextData(documentTextData).build();
|
||||
|
||||
List<DocumentPositionData> atomicPositionBlockData = document.streamTerminalTextBlocksInOrder()
|
||||
.flatMap(textBlock -> textBlock.getAtomicTextBlocks()
|
||||
.stream())
|
||||
.distinct()
|
||||
.map(DocumentDataMapper::toAtomicPositionBlockData)
|
||||
.toList();
|
||||
|
||||
AllDocumentPositionData allDocumentPositionData = AllDocumentPositionData.newBuilder().addAllDocumentPositionData(atomicPositionBlockData).build();
|
||||
|
||||
List<DocumentPage> documentPageData = document.getPages()
|
||||
.stream()
|
||||
.sorted(Comparator.comparingInt(Page::getNumber))
|
||||
.map(DocumentDataMapper::toPageData)
|
||||
.toList();
|
||||
|
||||
AllDocumentPages allDocumentPages = AllDocumentPages.newBuilder().addAllDocumentPages(documentPageData).build();
|
||||
DocumentStructureWrapper tableOfContentsData = toDocumentTreeData(document.getDocumentTree());
|
||||
return DocumentData.builder()
|
||||
.documentTextData(allDocumentTextData)
|
||||
.documentPositionData(allDocumentPositionData)
|
||||
.documentPages(allDocumentPages)
|
||||
.documentStructureWrapper(tableOfContentsData)
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
private DocumentStructureWrapper toDocumentTreeData(DocumentTree documentTree) {
|
||||
|
||||
return new DocumentStructureWrapper(DocumentStructure.newBuilder().setRoot(toEntryData(documentTree.getRoot())).build());
|
||||
}
|
||||
|
||||
|
||||
private EntryData toEntryData(DocumentTree.Entry entry) {
|
||||
|
||||
List<Long> atomicTextBlocks;
|
||||
|
||||
if (entry.getNode().isLeaf()) {
|
||||
atomicTextBlocks = toAtomicTextBlockIds(entry.getNode().getLeafTextBlock());
|
||||
} else {
|
||||
atomicTextBlocks = new ArrayList<>();
|
||||
}
|
||||
|
||||
Map<String, String> properties = switch (entry.getType()) {
|
||||
case TABLE -> PropertiesMapper.buildTableProperties((Table) entry.getNode());
|
||||
case TABLE_CELL -> PropertiesMapper.buildTableCellProperties((TableCell) entry.getNode());
|
||||
case IMAGE -> PropertiesMapper.buildImageProperties((Image) entry.getNode());
|
||||
case PARAGRAPH ->
|
||||
entry.getNode() instanceof DuplicatedParagraph duplicatedParagraph ? PropertiesMapper.buildDuplicateParagraphProperties(duplicatedParagraph) : new HashMap<>();
|
||||
default -> new HashMap<>();
|
||||
};
|
||||
|
||||
var documentBuilder = EntryData.newBuilder()
|
||||
.addAllTreeId(entry.getTreeId())
|
||||
.addAllChildren(entry.getChildren()
|
||||
.stream()
|
||||
.map(DocumentDataMapper::toEntryData)
|
||||
.toList())
|
||||
.setType(resolveType(entry.getType()))
|
||||
.addAllAtomicBlockIds(atomicTextBlocks)
|
||||
.addAllPageNumbers(entry.getNode().getPages()
|
||||
.stream()
|
||||
.map(Page::getNumber)
|
||||
.map(Integer::longValue)
|
||||
.toList())
|
||||
.putAllProperties(properties);
|
||||
|
||||
if (entry.getNode() != null) {
|
||||
documentBuilder.addAllEngines(entry.getNode().getEngines()
|
||||
.stream()
|
||||
.map(engine -> LayoutEngineProto.LayoutEngine.valueOf(engine.name()))
|
||||
.toList());
|
||||
} else {
|
||||
documentBuilder.addAllEngines(new HashSet<>(Set.of(LayoutEngineProto.LayoutEngine.ALGORITHM)));
|
||||
}
|
||||
return documentBuilder.build();
|
||||
}
|
||||
|
||||
|
||||
private static NodeTypeProto.NodeType resolveType(NodeType type) {
|
||||
|
||||
return NodeTypeProto.NodeType.valueOf(type.name());
|
||||
}
|
||||
|
||||
|
||||
private List<Long> toAtomicTextBlockIds(TextBlock textBlock) {
|
||||
|
||||
return textBlock.getAtomicTextBlocks()
|
||||
.stream()
|
||||
.map(AtomicTextBlock::getId)
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
private DocumentPage toPageData(Page p) {
|
||||
|
||||
return DocumentPage.newBuilder().setRotation(p.getRotation()).setHeight(p.getHeight()).setWidth(p.getWidth()).setNumber(p.getNumber()).build();
|
||||
}
|
||||
|
||||
|
||||
private DocumentTextData toAtomicTextBlockData(AtomicTextBlock atomicTextBlock) {
|
||||
|
||||
return DocumentTextData.newBuilder()
|
||||
.setId(atomicTextBlock.getId())
|
||||
.setPage(atomicTextBlock.getPage().getNumber().longValue())
|
||||
.setSearchText(atomicTextBlock.getSearchText())
|
||||
.setNumberOnPage(atomicTextBlock.getNumberOnPage())
|
||||
.setStart(atomicTextBlock.getTextRange().start())
|
||||
.setEnd(atomicTextBlock.getTextRange().end())
|
||||
.addAllLineBreaks(atomicTextBlock.getLineBreaks())
|
||||
.addAllItalicTextRanges(atomicTextBlock.getItalicTextRanges()
|
||||
.stream()
|
||||
.map(r -> RangeProto.Range.newBuilder().setStart(r.start()).setEnd(r.end()).build())
|
||||
.toList())
|
||||
.addAllBoldTextRanges(atomicTextBlock.getBoldTextRanges()
|
||||
.stream()
|
||||
.map(r -> RangeProto.Range.newBuilder().setStart(r.start()).setEnd(r.end()).build())
|
||||
.toList())
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
private DocumentPositionData toAtomicPositionBlockData(AtomicTextBlock atomicTextBlock) {
|
||||
|
||||
return DocumentPositionData.newBuilder()
|
||||
.setId(atomicTextBlock.getId())
|
||||
.addAllPositions(toPositions(atomicTextBlock.getPositions()))
|
||||
.addAllStringIdxToPositionIdx(atomicTextBlock.getStringIdxToPositionIdx())
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
private static List<Position> toPositions(List<Rectangle2D> rects) {
|
||||
|
||||
List<Position> positions = new ArrayList<>();
|
||||
for (Rectangle2D rect : rects) {
|
||||
positions.add(toPosition(rect));
|
||||
}
|
||||
return positions;
|
||||
}
|
||||
|
||||
|
||||
private static Position toPosition(Rectangle2D rect) {
|
||||
|
||||
return Position.newBuilder().addValue((float) rect.getMinX()).addValue((float) rect.getMinY()).addValue((float) rect.getWidth()).addValue((float) rect.getHeight()).build();
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,15 +1,18 @@
|
||||
package com.iqser.red.service.redaction.v1.server.service.document;
|
||||
package com.iqser.red.service.redaction.v1.server.mapper;
|
||||
|
||||
import static com.iqser.red.service.redaction.v1.server.data.DocumentPageProto.DocumentPage;
|
||||
import static com.iqser.red.service.redaction.v1.server.data.DocumentPositionDataProto.AllDocumentPositionData;
|
||||
import static com.iqser.red.service.redaction.v1.server.data.DocumentTextDataProto.AllDocumentTextData;
|
||||
import static com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentData;
|
||||
import com.iqser.red.service.redaction.v1.server.data.DocumentData;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.DuplicatedParagraph;
|
||||
@ -17,6 +20,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Header;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.LayoutEngine;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section;
|
||||
@ -24,13 +28,11 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNo
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SuperSection;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableOfContents;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableOfContentsItem;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPage;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextData;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@ -43,79 +45,89 @@ public class DocumentGraphMapper {
|
||||
DocumentTree documentTree = new DocumentTree(document);
|
||||
Context context = new Context(documentData, documentTree);
|
||||
|
||||
context.pageData.addAll(Arrays.stream(documentData.getDocumentPages())
|
||||
.map(DocumentGraphMapper::buildPage)
|
||||
.toList());
|
||||
|
||||
context.documentTree.getRoot().getChildren().addAll(buildEntries(documentData.getDocumentStructure().getRoot().getChildren(), context));
|
||||
context.documentTree.getRoot().getChildren().addAll(buildEntries(documentData.getDocumentStructure().getRoot().getChildrenList(), context));
|
||||
|
||||
document.setDocumentTree(context.documentTree);
|
||||
document.setPages(new HashSet<>(context.pageData));
|
||||
document.setNumberOfPages(documentData.getDocumentPages().length);
|
||||
document.setNumberOfPages(documentData.getDocumentPages().getDocumentPagesCount());
|
||||
|
||||
document.setTextBlock(document.getTextBlock());
|
||||
return document;
|
||||
}
|
||||
|
||||
|
||||
private List<DocumentTree.Entry> buildEntries(List<DocumentStructure.EntryData> entries, Context context) {
|
||||
private List<DocumentTree.Entry> buildEntries(List<EntryData> entries, Context context) {
|
||||
|
||||
List<DocumentTree.Entry> newEntries = new ArrayList<>(entries.size());
|
||||
for (DocumentStructure.EntryData entryData : entries) {
|
||||
for (EntryData entryData : entries) {
|
||||
|
||||
List<Page> pages = Arrays.stream(entryData.getPageNumbers())
|
||||
.map(pageNumber -> getPage(pageNumber, context))
|
||||
List<Page> pages = entryData.getPageNumbersList()
|
||||
.stream()
|
||||
.map(context::getPage)
|
||||
.toList();
|
||||
|
||||
SemanticNode node = switch (entryData.getType()) {
|
||||
case SECTION -> buildSection(context);
|
||||
case SUPER_SECTION -> buildSuperSection(context);
|
||||
case PARAGRAPH -> buildParagraph(context, entryData.getProperties());
|
||||
case PARAGRAPH -> buildParagraph(context, entryData.getPropertiesMap());
|
||||
case HEADLINE -> buildHeadline(context);
|
||||
case HEADER -> buildHeader(context);
|
||||
case FOOTER -> buildFooter(context);
|
||||
case TABLE -> buildTable(context, entryData.getProperties());
|
||||
case TABLE_CELL -> buildTableCell(context, entryData.getProperties());
|
||||
case IMAGE -> buildImage(context, entryData.getProperties(), entryData.getPageNumbers());
|
||||
case TABLE -> buildTable(context, entryData.getPropertiesMap());
|
||||
case TABLE_CELL -> buildTableCell(context, entryData.getPropertiesMap());
|
||||
case IMAGE -> buildImage(context, entryData.getPropertiesMap(), entryData.getPageNumbersList());
|
||||
case TABLE_OF_CONTENTS -> buildTableOfContents(context);
|
||||
case TABLE_OF_CONTENTS_ITEM -> buildTableOfContentsItem(context);
|
||||
default -> throw new UnsupportedOperationException("Not yet implemented for type " + entryData.getType());
|
||||
};
|
||||
|
||||
if (entryData.getAtomicBlockIds().length > 0) {
|
||||
TextBlock textBlock = toTextBlock(entryData.getAtomicBlockIds(), context, node);
|
||||
if (entryData.getAtomicBlockIdsCount() > 0) {
|
||||
TextBlock textBlock = toTextBlock(entryData.getAtomicBlockIdsList(), context, node);
|
||||
node.setLeafTextBlock(textBlock);
|
||||
switch (entryData.getType()) {
|
||||
case HEADER -> pages.forEach(page -> page.setHeader((Header) node));
|
||||
case FOOTER -> pages.forEach(page -> page.setFooter((Footer) node));
|
||||
case IMAGE -> pages.forEach(page -> page.getImages().add((Image) node));
|
||||
default -> textBlock.getAtomicTextBlocks()
|
||||
.forEach(atb -> atb.getPage().getTextBlocksOnPage().add(atb));
|
||||
}
|
||||
}
|
||||
List<Integer> treeId = Arrays.stream(entryData.getTreeId()).boxed()
|
||||
.toList();
|
||||
if (entryData.getEngines() != null) {
|
||||
entryData.getEngines()
|
||||
.forEach(node::addEngine);
|
||||
} else {
|
||||
entryData.setEngines(Collections.emptySet());
|
||||
}
|
||||
List<Integer> treeId = entryData.getTreeIdList();
|
||||
node.setTreeId(treeId);
|
||||
|
||||
switch (entryData.getType()) {
|
||||
case HEADER -> pages.forEach(page -> page.setHeader((Header) node));
|
||||
case FOOTER -> pages.forEach(page -> page.setFooter((Footer) node));
|
||||
default -> pages.forEach(page -> page.getMainBody().add(node));
|
||||
}
|
||||
entryData.getEnginesList()
|
||||
.stream()
|
||||
.map(engine -> LayoutEngine.valueOf(engine.name()))
|
||||
.forEach(node::addEngine);
|
||||
|
||||
newEntries.add(DocumentTree.Entry.builder().treeId(treeId).children(buildEntries(entryData.getChildren(), context)).node(node).build());
|
||||
newEntries.add(DocumentTree.Entry.builder().treeId(treeId).children(buildEntries(entryData.getChildrenList(), context)).node(node).build());
|
||||
}
|
||||
return newEntries;
|
||||
}
|
||||
|
||||
|
||||
private static TableOfContents buildTableOfContents(Context context) {
|
||||
|
||||
return TableOfContents.builder().documentTree(context.documentTree).build();
|
||||
}
|
||||
|
||||
|
||||
private static TableOfContentsItem buildTableOfContentsItem(Context context) {
|
||||
|
||||
return TableOfContentsItem.builder().documentTree(context.documentTree).build();
|
||||
}
|
||||
|
||||
|
||||
private Headline buildHeadline(Context context) {
|
||||
|
||||
return Headline.builder().documentTree(context.documentTree).build();
|
||||
}
|
||||
|
||||
|
||||
private Image buildImage(Context context, Map<String, String> properties, Long[] pageNumbers) {
|
||||
private Image buildImage(Context context, Map<String, String> properties, List<Long> pageNumbers) {
|
||||
|
||||
assert pageNumbers.length == 1;
|
||||
Page page = getPage(pageNumbers[0], context);
|
||||
assert pageNumbers.size() == 1;
|
||||
Page page = context.getPage(pageNumbers.get(0));
|
||||
var builder = Image.builder();
|
||||
PropertiesMapper.parseImageProperties(properties, builder);
|
||||
return builder.documentTree(context.documentTree).page(page).build();
|
||||
@ -161,13 +173,14 @@ public class DocumentGraphMapper {
|
||||
return SuperSection.builder().documentTree(context.documentTree).build();
|
||||
}
|
||||
|
||||
|
||||
private Paragraph buildParagraph(Context context, Map<String, String> properties) {
|
||||
|
||||
if (PropertiesMapper.isDuplicateParagraph(properties)) {
|
||||
|
||||
DuplicatedParagraph duplicatedParagraph = DuplicatedParagraph.builder().documentTree(context.documentTree).build();
|
||||
|
||||
Long[] unsortedTextblockIds = PropertiesMapper.getUnsortedTextblockIds(properties);
|
||||
var unsortedTextblockIds = PropertiesMapper.getUnsortedTextblockIds(properties);
|
||||
duplicatedParagraph.setUnsortedLeafTextBlock(toTextBlock(unsortedTextblockIds, context, duplicatedParagraph));
|
||||
return duplicatedParagraph;
|
||||
}
|
||||
@ -176,9 +189,9 @@ public class DocumentGraphMapper {
|
||||
}
|
||||
|
||||
|
||||
private TextBlock toTextBlock(Long[] atomicTextBlockIds, Context context, SemanticNode parent) {
|
||||
private TextBlock toTextBlock(List<Long> atomicTextBlockIds, Context context, SemanticNode parent) {
|
||||
|
||||
return Arrays.stream(atomicTextBlockIds)
|
||||
return atomicTextBlockIds.stream()
|
||||
.map(atomicTextBlockId -> getAtomicTextBlock(context, parent, atomicTextBlockId))
|
||||
.collect(new TextBlockCollector());
|
||||
}
|
||||
@ -186,24 +199,16 @@ public class DocumentGraphMapper {
|
||||
|
||||
private AtomicTextBlock getAtomicTextBlock(Context context, SemanticNode parent, Long atomicTextBlockId) {
|
||||
|
||||
return AtomicTextBlock.fromAtomicTextBlockData(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)),
|
||||
context.documentPositionData.get(Math.toIntExact(atomicTextBlockId)),
|
||||
return AtomicTextBlock.fromAtomicTextBlockData(context.documentTextData.getDocumentTextData(Math.toIntExact(atomicTextBlockId)),
|
||||
context.documentPositionData.getDocumentPositionData(Math.toIntExact(atomicTextBlockId)),
|
||||
parent,
|
||||
getPage(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)).getPage(), context));
|
||||
context.getPage(context.documentTextData.getDocumentTextData(Math.toIntExact(atomicTextBlockId)).getPage()));
|
||||
}
|
||||
|
||||
|
||||
private Page buildPage(DocumentPage p) {
|
||||
|
||||
return Page.builder().rotation(p.getRotation()).height(p.getHeight()).width(p.getWidth()).number(p.getNumber()).mainBody(new LinkedList<>()).build();
|
||||
}
|
||||
|
||||
|
||||
private Page getPage(Long pageIndex, Context context) {
|
||||
|
||||
Page page = context.pageData.get(Math.toIntExact(pageIndex) - 1);
|
||||
assert page.getNumber() == Math.toIntExact(pageIndex);
|
||||
return page;
|
||||
return Page.builder().rotation(p.getRotation()).height(p.getHeight()).width(p.getWidth()).number(p.getNumber()).textBlocksOnPage(new LinkedList<>()).build();
|
||||
}
|
||||
|
||||
|
||||
@ -211,21 +216,33 @@ public class DocumentGraphMapper {
|
||||
|
||||
private final DocumentTree documentTree;
|
||||
private final List<Page> pageData;
|
||||
private final List<DocumentTextData> documentTextData;
|
||||
private final List<DocumentPositionData> documentPositionData;
|
||||
private final AllDocumentTextData documentTextData;
|
||||
private final AllDocumentPositionData documentPositionData;
|
||||
|
||||
|
||||
Context(DocumentData documentData, DocumentTree documentTree) {
|
||||
|
||||
this.documentTree = documentTree;
|
||||
this.pageData = new ArrayList<>();
|
||||
this.documentTextData = Arrays.stream(documentData.getDocumentTextData())
|
||||
.toList();
|
||||
this.documentPositionData = Arrays.stream(documentData.getDocumentPositionData())
|
||||
this.pageData = documentData.getDocumentPages().getDocumentPagesList()
|
||||
.stream()
|
||||
.map(DocumentGraphMapper::buildPage)
|
||||
.sorted(Comparator.comparingInt(Page::getNumber))
|
||||
.toList();
|
||||
this.documentTextData = documentData.getDocumentTextData();
|
||||
this.documentPositionData = documentData.getDocumentPositionData();
|
||||
|
||||
}
|
||||
|
||||
|
||||
public Page getPage(Long pageIndex) {
|
||||
|
||||
Page page = pageData.get(Math.toIntExact(pageIndex) - 1);
|
||||
assert page.getNumber() == Math.toIntExact(pageIndex);
|
||||
return page;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -0,0 +1,152 @@
|
||||
package com.iqser.red.service.redaction.v1.server.mapper;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.data.DocumentStructureWrapper;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.DuplicatedParagraph;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
public class PropertiesMapper {
|
||||
|
||||
public static Map<String, String> buildImageProperties(Image image) {
|
||||
|
||||
Map<String, String> properties = new HashMap<>();
|
||||
properties.put(DocumentStructureWrapper.ImageProperties.IMAGE_TYPE, image.getImageType().name());
|
||||
properties.put(DocumentStructureWrapper.ImageProperties.TRANSPARENT, String.valueOf(image.isTransparent()));
|
||||
properties.put(DocumentStructureWrapper.ImageProperties.POSITION, toString(image.getPosition()));
|
||||
properties.put(DocumentStructureWrapper.ImageProperties.ID, image.getId());
|
||||
properties.put(DocumentStructureWrapper.ImageProperties.REPRESENTATION_HASH, image.getRepresentationHash());
|
||||
return properties;
|
||||
}
|
||||
|
||||
|
||||
public static Map<String, String> buildTableCellProperties(TableCell tableCell) {
|
||||
|
||||
Map<String, String> properties = new HashMap<>();
|
||||
properties.put(DocumentStructureWrapper.TableCellProperties.ROW, String.valueOf(tableCell.getRow()));
|
||||
properties.put(DocumentStructureWrapper.TableCellProperties.COL, String.valueOf(tableCell.getCol()));
|
||||
properties.put(DocumentStructureWrapper.TableCellProperties.HEADER, String.valueOf(tableCell.isHeader()));
|
||||
|
||||
if (tableCell.getPages().size() > 1 || tableCell.getBBox().keySet().size() > 1) {
|
||||
throw new IllegalArgumentException("TableCell can only occur on a single page!");
|
||||
}
|
||||
String bBoxString = toString(tableCell.getBBox()
|
||||
.get(tableCell.getPages()
|
||||
.stream()
|
||||
.findFirst()
|
||||
.get()));
|
||||
properties.put(DocumentStructureWrapper.TableCellProperties.B_BOX, bBoxString);
|
||||
|
||||
return properties;
|
||||
}
|
||||
|
||||
|
||||
public static Map<String, String> buildTableProperties(Table table) {
|
||||
|
||||
Map<String, String> properties = new HashMap<>();
|
||||
properties.put(DocumentStructureWrapper.TableProperties.NUMBER_OF_ROWS, String.valueOf(table.getNumberOfRows()));
|
||||
properties.put(DocumentStructureWrapper.TableProperties.NUMBER_OF_COLS, String.valueOf(table.getNumberOfCols()));
|
||||
return properties;
|
||||
}
|
||||
|
||||
|
||||
public static void parseImageProperties(Map<String, String> properties, Image.ImageBuilder<?, ?> builder) {
|
||||
|
||||
builder.imageType(parseImageType(properties.get(DocumentStructureWrapper.ImageProperties.IMAGE_TYPE)));
|
||||
builder.transparent(Boolean.parseBoolean(properties.get(DocumentStructureWrapper.ImageProperties.TRANSPARENT)));
|
||||
builder.position(DocumentStructureWrapper.parseRectangle2D(properties.get(DocumentStructureWrapper.ImageProperties.POSITION)));
|
||||
builder.id(properties.get(DocumentStructureWrapper.ImageProperties.ID));
|
||||
}
|
||||
|
||||
|
||||
public static void parseTableCellProperties(Map<String, String> properties, TableCell.TableCellBuilder<?, ?> builder) {
|
||||
|
||||
builder.row(Integer.parseInt(properties.get(DocumentStructureWrapper.TableCellProperties.ROW)));
|
||||
builder.col(Integer.parseInt(properties.get(DocumentStructureWrapper.TableCellProperties.COL)));
|
||||
builder.header(Boolean.parseBoolean(properties.get(DocumentStructureWrapper.TableCellProperties.HEADER)));
|
||||
builder.bBox(DocumentStructureWrapper.parseRectangle2D(properties.get(DocumentStructureWrapper.TableCellProperties.B_BOX)));
|
||||
}
|
||||
|
||||
|
||||
public static void parseTableProperties(Map<String, String> properties, Table.TableBuilder builder) {
|
||||
|
||||
builder.numberOfRows(Integer.parseInt(properties.get(DocumentStructureWrapper.TableProperties.NUMBER_OF_ROWS)));
|
||||
builder.numberOfCols(Integer.parseInt(properties.get(DocumentStructureWrapper.TableProperties.NUMBER_OF_COLS)));
|
||||
}
|
||||
|
||||
|
||||
public static Map<String, String> buildDuplicateParagraphProperties(DuplicatedParagraph duplicatedParagraph) {
|
||||
|
||||
Map<String, String> properties = new HashMap<>();
|
||||
properties.put(DocumentStructureWrapper.DuplicateParagraphProperties.UNSORTED_TEXTBLOCK_ID,
|
||||
Arrays.toString(toAtomicTextBlockIds(duplicatedParagraph.getUnsortedLeafTextBlock())));
|
||||
return properties;
|
||||
}
|
||||
|
||||
|
||||
public static boolean isDuplicateParagraph(Map<String, String> properties) {
|
||||
|
||||
return properties.containsKey(DocumentStructureWrapper.DuplicateParagraphProperties.UNSORTED_TEXTBLOCK_ID);
|
||||
}
|
||||
|
||||
|
||||
public static List<Long> getUnsortedTextblockIds(Map<String, String> properties) {
|
||||
|
||||
return toLongList(properties.get(DocumentStructureWrapper.DuplicateParagraphProperties.UNSORTED_TEXTBLOCK_ID));
|
||||
}
|
||||
|
||||
|
||||
public static List<Long> toLongList(String ids) {
|
||||
|
||||
return Arrays.stream(ids.substring(1, ids.length() - 1).trim().split(","))
|
||||
.map(Long::valueOf)
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
private static ImageType parseImageType(String imageType) {
|
||||
|
||||
try {
|
||||
return ImageType.valueOf(imageType.toUpperCase(Locale.ROOT));
|
||||
} catch (IllegalArgumentException e) {
|
||||
return ImageType.OTHER;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static String toString(Rectangle2D rectangle2D) {
|
||||
|
||||
return String.format(Locale.US,
|
||||
"%f%s%f%s%f%s%f",
|
||||
rectangle2D.getX(),
|
||||
DocumentStructureWrapper.RECTANGLE_DELIMITER,
|
||||
rectangle2D.getY(),
|
||||
DocumentStructureWrapper.RECTANGLE_DELIMITER,
|
||||
rectangle2D.getWidth(),
|
||||
DocumentStructureWrapper.RECTANGLE_DELIMITER,
|
||||
rectangle2D.getHeight());
|
||||
}
|
||||
|
||||
|
||||
private static Long[] toAtomicTextBlockIds(TextBlock textBlock) {
|
||||
|
||||
return textBlock.getAtomicTextBlocks()
|
||||
.stream()
|
||||
.map(AtomicTextBlock::getId)
|
||||
.toArray(Long[]::new);
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,116 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Header;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SuperSection;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableOfContents;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableOfContentsItem;
|
||||
|
||||
public abstract class AbstractNodeVisitor implements NodeVisitor {
|
||||
|
||||
@Override
|
||||
public void visit(Document document) {
|
||||
|
||||
defaultVisit(document);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void visit(SuperSection superSection) {
|
||||
|
||||
defaultVisit(superSection);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void visit(Section section) {
|
||||
|
||||
defaultVisit(section);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void visit(Headline headline) {
|
||||
|
||||
defaultVisit(headline);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void visit(Paragraph paragraph) {
|
||||
|
||||
defaultVisit(paragraph);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void visit(Footer footer) {
|
||||
|
||||
defaultVisit(footer);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void visit(Header header) {
|
||||
|
||||
defaultVisit(header);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void visit(Image image) {
|
||||
|
||||
defaultVisit(image);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void visit(Table table) {
|
||||
|
||||
defaultVisit(table);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void visit(TableCell tableCell) {
|
||||
|
||||
defaultVisit(tableCell);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void visit(TableOfContents toc) {
|
||||
|
||||
defaultVisit(toc);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void visit(TableOfContentsItem toci) {
|
||||
|
||||
defaultVisit(toci);
|
||||
}
|
||||
|
||||
|
||||
public void visitNodeDefault(SemanticNode node) {
|
||||
// By default, it does nothing
|
||||
}
|
||||
|
||||
|
||||
protected void defaultVisit(SemanticNode semanticNode) {
|
||||
|
||||
visitNodeDefault(semanticNode);
|
||||
semanticNode.streamChildren()
|
||||
.forEach(node -> node.accept(this));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -9,6 +9,8 @@ import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.GenericSemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
|
||||
@ -17,6 +19,8 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.EntityCreationUtility;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.EntityEnrichmentService;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -35,7 +39,7 @@ public class DocumentTree {
|
||||
|
||||
public DocumentTree(Document document) {
|
||||
|
||||
root = Entry.builder().treeId(Collections.emptyList()).children(new LinkedList<>()).node(document).build();
|
||||
this.root = Entry.builder().treeId(Collections.emptyList()).children(new LinkedList<>()).node(document).build();
|
||||
}
|
||||
|
||||
|
||||
@ -288,14 +292,30 @@ public class DocumentTree {
|
||||
if (treeId.isEmpty()) {
|
||||
return root;
|
||||
}
|
||||
Entry entry = root.children.get(treeId.get(0));
|
||||
for (int id : treeId.subList(1, treeId.size())) {
|
||||
Entry entry = root;
|
||||
for (int id : treeId) {
|
||||
entry = entry.children.get(id);
|
||||
}
|
||||
return entry;
|
||||
}
|
||||
|
||||
|
||||
public Optional<Entry> findEntryById(List<Integer> treeId) {
|
||||
|
||||
if (treeId.isEmpty()) {
|
||||
return Optional.of(root);
|
||||
}
|
||||
Entry entry = root;
|
||||
for (int id : treeId) {
|
||||
if (id < 0 || id >= entry.children.size()) {
|
||||
return Optional.empty();
|
||||
}
|
||||
entry = entry.children.get(id);
|
||||
}
|
||||
return Optional.of(entry);
|
||||
}
|
||||
|
||||
|
||||
public Stream<Entry> mainEntries() {
|
||||
|
||||
return root.children.stream();
|
||||
@ -342,6 +362,25 @@ public class DocumentTree {
|
||||
}
|
||||
|
||||
|
||||
public void addEntityToGraph(TextEntity entity) {
|
||||
|
||||
getRoot().getNode().addThisToEntityIfIntersects(entity);
|
||||
|
||||
TextBlock textBlock = entity.getDeepestFullyContainingNode().getTextBlock();
|
||||
EntityEnrichmentService.enrichEntity(entity, textBlock);
|
||||
|
||||
EntityCreationUtility.addToPages(entity);
|
||||
EntityCreationUtility.addEntityToNodeEntitySets(entity);
|
||||
|
||||
if (entity.getEntityType().equals(EntityType.TEMPORARY)) {
|
||||
return;
|
||||
}
|
||||
|
||||
entity.computeRelations();
|
||||
entity.notifyEntityInserted();
|
||||
}
|
||||
|
||||
|
||||
@Builder
|
||||
@Getter
|
||||
@AllArgsConstructor
|
||||
@ -0,0 +1,32 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
|
||||
import lombok.Getter;
|
||||
|
||||
public class IntersectingNodeVisitor extends AbstractNodeVisitor {
|
||||
|
||||
@Getter
|
||||
private Set<SemanticNode> intersectingNodes;
|
||||
private final TextRange textRange;
|
||||
|
||||
|
||||
public IntersectingNodeVisitor(TextRange textRange) {
|
||||
|
||||
this.textRange = textRange;
|
||||
this.intersectingNodes = new HashSet<>();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void visitNodeDefault(SemanticNode node) {
|
||||
|
||||
if (textRange.intersects(node.getTextRange())) {
|
||||
intersectingNodes.add(node);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,53 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Header;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SuperSection;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableOfContents;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableOfContentsItem;
|
||||
|
||||
public interface NodeVisitor {
|
||||
|
||||
void visit(Document document);
|
||||
|
||||
|
||||
void visit(SuperSection superSection);
|
||||
|
||||
|
||||
void visit(Section section);
|
||||
|
||||
|
||||
void visit(Headline headline);
|
||||
|
||||
|
||||
void visit(Paragraph paragraph);
|
||||
|
||||
|
||||
void visit(Footer footer);
|
||||
|
||||
|
||||
void visit(Header header);
|
||||
|
||||
|
||||
void visit(Image image);
|
||||
|
||||
|
||||
void visit(Table table);
|
||||
|
||||
|
||||
void visit(TableCell tableCell);
|
||||
|
||||
|
||||
void visit(TableOfContents tableOfContents);
|
||||
|
||||
|
||||
void visit(TableOfContentsItem tableOfContentsItem);
|
||||
|
||||
}
|
||||
@ -134,6 +134,12 @@ public class TextRange implements Comparable<TextRange> {
|
||||
}
|
||||
|
||||
|
||||
public boolean containsExclusive(int index) {
|
||||
|
||||
return start <= index && index < end;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if this {@link TextRange} intersects with another {@link TextRange}.
|
||||
*
|
||||
@ -0,0 +1,20 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.entity;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@Getter
|
||||
@RequiredArgsConstructor
|
||||
public abstract class AbstractRelation implements Relation {
|
||||
|
||||
protected final TextEntity a;
|
||||
protected final TextEntity b;
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return this.getClass().getSimpleName() + "{" + "a=" + a + ", b=" + b + '}';
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,18 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.entity;
|
||||
|
||||
public class Containment extends Intersection {
|
||||
|
||||
public Containment(TextEntity container, TextEntity contained) {
|
||||
|
||||
super(container, contained);
|
||||
}
|
||||
|
||||
public TextEntity getContainer() {
|
||||
return a;
|
||||
}
|
||||
|
||||
public TextEntity getContained() {
|
||||
return b;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,25 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.entity;
|
||||
|
||||
public interface EntityEventListener {
|
||||
|
||||
/**
|
||||
* Invoked when an entity is inserted.
|
||||
*
|
||||
* @param entity The entity that was inserted.
|
||||
*/
|
||||
void onEntityInserted(IEntity entity);
|
||||
|
||||
/**
|
||||
* Invoked when an entity is updated.
|
||||
*
|
||||
* @param entity The entity that was updated.
|
||||
*/
|
||||
void onEntityUpdated(IEntity entity);
|
||||
|
||||
/**
|
||||
* Invoked when an entity is removed.
|
||||
*
|
||||
* @param entity The entity that was removed.
|
||||
*/
|
||||
void onEntityRemoved(IEntity entity);
|
||||
}
|
||||
@ -6,5 +6,6 @@ public enum EntityType {
|
||||
RECOMMENDATION,
|
||||
FALSE_POSITIVE,
|
||||
FALSE_RECOMMENDATION,
|
||||
DICTIONARY_REMOVAL
|
||||
DICTIONARY_REMOVAL,
|
||||
TEMPORARY
|
||||
}
|
||||
@ -0,0 +1,10 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.entity;
|
||||
|
||||
public class Equality extends Containment {
|
||||
|
||||
public Equality(TextEntity a, TextEntity b) {
|
||||
|
||||
super(a, b);
|
||||
}
|
||||
|
||||
}
|
||||
@ -6,7 +6,6 @@ import java.util.PriorityQueue;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.model.drools.RuleIdentifier;
|
||||
|
||||
import lombok.NonNull;
|
||||
|
||||
@ -52,6 +51,17 @@ public interface IEntity {
|
||||
String type();
|
||||
|
||||
|
||||
/**
|
||||
* An Entity is valid, when it active and not a false recommendation, a false positive or a dictionary removal.
|
||||
*
|
||||
* @return true, if the entity is valid, false otherwise/
|
||||
*/
|
||||
default boolean valid() {
|
||||
|
||||
return active();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the length of the entity's value.
|
||||
*
|
||||
@ -85,6 +95,9 @@ public interface IEntity {
|
||||
// Don't use default accessor pattern (e.g. isApplied()), as it might lead to errors in drools due to property-specific optimization of the drools planner.
|
||||
default boolean applied() {
|
||||
|
||||
if (this.getMatchedRule().isHigherPriorityThanManual()) {
|
||||
return getMatchedRule().isApplied();
|
||||
}
|
||||
return getManualOverwrite().getApplied()
|
||||
.orElse(getMatchedRule().isApplied());
|
||||
}
|
||||
@ -108,6 +121,10 @@ public interface IEntity {
|
||||
*/
|
||||
default boolean ignored() {
|
||||
|
||||
if (this.getMatchedRule().isHigherPriorityThanManual()) {
|
||||
return getMatchedRule().isIgnored();
|
||||
}
|
||||
|
||||
return getManualOverwrite().getIgnored()
|
||||
.orElse(getMatchedRule().isIgnored());
|
||||
}
|
||||
@ -120,6 +137,9 @@ public interface IEntity {
|
||||
*/
|
||||
default boolean removed() {
|
||||
|
||||
if (this.getMatchedRule().isHigherPriorityThanManual()) {
|
||||
return getMatchedRule().isRemoved();
|
||||
}
|
||||
return getManualOverwrite().getRemoved()
|
||||
.orElse(getMatchedRule().isRemoved());
|
||||
}
|
||||
@ -132,6 +152,9 @@ public interface IEntity {
|
||||
*/
|
||||
default boolean resized() {
|
||||
|
||||
if (this.getMatchedRule().isHigherPriorityThanManual()) {
|
||||
return getMatchedRule().isRemoved();
|
||||
}
|
||||
return getManualOverwrite().getResized()
|
||||
.orElse(false);
|
||||
}
|
||||
@ -316,7 +339,9 @@ public interface IEntity {
|
||||
*/
|
||||
default void addMatchedRule(MatchedRule matchedRule) {
|
||||
|
||||
boolean wasValid = valid();
|
||||
getMatchedRuleList().add(matchedRule);
|
||||
handleStateChange(wasValid);
|
||||
}
|
||||
|
||||
|
||||
@ -330,7 +355,53 @@ public interface IEntity {
|
||||
if (getMatchedRuleList().equals(matchedRules)) {
|
||||
return;
|
||||
}
|
||||
boolean wasValid = valid();
|
||||
getMatchedRuleList().addAll(matchedRules);
|
||||
handleStateChange(wasValid);
|
||||
}
|
||||
|
||||
|
||||
void addEntityEventListener(EntityEventListener listener);
|
||||
|
||||
|
||||
void removeEntityEventListener(EntityEventListener listener);
|
||||
|
||||
|
||||
default void notifyEntityInserted() {
|
||||
|
||||
for (EntityEventListener listener : getEntityEventListeners()) {
|
||||
listener.onEntityInserted(this);
|
||||
}
|
||||
}
|
||||
|
||||
default void notifyEntityUpdated() {
|
||||
|
||||
for (EntityEventListener listener : getEntityEventListeners()) {
|
||||
listener.onEntityUpdated(this);
|
||||
}
|
||||
}
|
||||
|
||||
default void notifyEntityRemoved() {
|
||||
|
||||
for (EntityEventListener listener : getEntityEventListeners()) {
|
||||
listener.onEntityRemoved(this);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Collection<EntityEventListener> getEntityEventListeners();
|
||||
|
||||
|
||||
default void handleStateChange(boolean wasValid) {
|
||||
|
||||
if (valid() == wasValid) {
|
||||
return;
|
||||
}
|
||||
if (!removed()) {
|
||||
notifyEntityUpdated();
|
||||
} else {
|
||||
notifyEntityRemoved();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -364,15 +435,9 @@ public interface IEntity {
|
||||
*
|
||||
* @return The built reason string.
|
||||
*/
|
||||
default String buildReasonWithManualChangeDescriptions() {
|
||||
default String buildReason() {
|
||||
|
||||
if (getManualOverwrite().getDescriptions().isEmpty()) {
|
||||
return getMatchedRule().getReason();
|
||||
}
|
||||
if (getMatchedRule().getReason().isEmpty()) {
|
||||
return String.join(", ", getManualOverwrite().getDescriptions());
|
||||
}
|
||||
return getMatchedRule().getReason() + ", " + String.join(", ", getManualOverwrite().getDescriptions());
|
||||
return getMatchedRule().getReason();
|
||||
}
|
||||
|
||||
|
||||
@ -0,0 +1,10 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.entity;
|
||||
|
||||
public class Intersection extends AbstractRelation {
|
||||
|
||||
public Intersection(TextEntity a, TextEntity b) {
|
||||
|
||||
super(a, b);
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,10 +1,8 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.entity;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation;
|
||||
@ -14,7 +12,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
|
||||
import com.iqser.red.service.redaction.v1.server.model.RectangleWithPage;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -26,18 +23,9 @@ import lombok.experimental.FieldDefaults;
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class ManualChangeOverwrite {
|
||||
|
||||
private static final Map<Class<? extends BaseAnnotation>, String> MANUAL_CHANGE_DESCRIPTIONS = Map.of(//
|
||||
ManualRedactionEntry.class, "created by manual change", //
|
||||
ManualLegalBasisChange.class, "legal basis was manually changed", //
|
||||
ManualResizeRedaction.class, "resized by manual override", //
|
||||
ManualForceRedaction.class, "forced by manual override", //
|
||||
IdRemoval.class, "removed by manual override", //
|
||||
ManualRecategorization.class, "recategorized by manual override");
|
||||
|
||||
@Builder.Default
|
||||
List<BaseAnnotation> manualChanges = new LinkedList<>();
|
||||
boolean changed;
|
||||
List<String> descriptions;
|
||||
String type;
|
||||
String legalBasis;
|
||||
String section;
|
||||
@ -63,6 +51,7 @@ public class ManualChangeOverwrite {
|
||||
this.manualChanges = new LinkedList<>();
|
||||
}
|
||||
|
||||
|
||||
public ManualChangeOverwrite(EntityType entityType, String section) {
|
||||
|
||||
this(entityType);
|
||||
@ -95,8 +84,6 @@ public class ManualChangeOverwrite {
|
||||
|
||||
private void updateFields(List<BaseAnnotation> sortedManualChanges) {
|
||||
|
||||
descriptions = new LinkedList<>();
|
||||
|
||||
for (BaseAnnotation manualChange : sortedManualChanges) {
|
||||
// ManualRedactionEntries are created prior to rule execution in analysis service.
|
||||
|
||||
@ -151,8 +138,6 @@ public class ManualChangeOverwrite {
|
||||
legalBasis = recategorization.getLegalBasis();
|
||||
}
|
||||
}
|
||||
|
||||
descriptions.add(MANUAL_CHANGE_DESCRIPTIONS.get(manualChange.getClass()));
|
||||
}
|
||||
changed = false;
|
||||
}
|
||||
@ -245,13 +230,6 @@ public class ManualChangeOverwrite {
|
||||
}
|
||||
|
||||
|
||||
public List<String> getDescriptions() {
|
||||
|
||||
calculateCurrentOverride();
|
||||
return descriptions == null ? Collections.emptyList() : descriptions;
|
||||
}
|
||||
|
||||
|
||||
public Optional<List<RectangleWithPage>> getPositions() {
|
||||
|
||||
calculateCurrentOverride();
|
||||
@ -5,9 +5,6 @@ import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.drools.RuleIdentifier;
|
||||
import com.iqser.red.service.redaction.v1.server.model.drools.RuleType;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
@ -28,8 +25,9 @@ public final class MatchedRule implements Comparable<MatchedRule> {
|
||||
public static final RuleType FINAL_TYPE = RuleType.fromString("FINAL");
|
||||
public static final RuleType ELIMINATION_RULE_TYPE = RuleType.fromString("X");
|
||||
public static final RuleType IMPORTED_TYPE = RuleType.fromString("IMP");
|
||||
public static final RuleType MANUAL_TYPE = RuleType.fromString("MAN");
|
||||
public static final RuleType DICTIONARY_TYPE = RuleType.fromString("DICT");
|
||||
private static final List<RuleType> RULE_TYPE_PRIORITIES = List.of(FINAL_TYPE, ELIMINATION_RULE_TYPE, IMPORTED_TYPE, DICTIONARY_TYPE);
|
||||
private static final List<RuleType> RULE_TYPE_PRIORITIES = List.of(FINAL_TYPE, ELIMINATION_RULE_TYPE, MANUAL_TYPE, IMPORTED_TYPE, DICTIONARY_TYPE);
|
||||
|
||||
RuleIdentifier ruleIdentifier;
|
||||
@Builder.Default
|
||||
@ -57,6 +55,13 @@ public final class MatchedRule implements Comparable<MatchedRule> {
|
||||
}
|
||||
|
||||
|
||||
public boolean isHigherPriorityThanManual() {
|
||||
|
||||
return (-1 < RULE_TYPE_PRIORITIES.indexOf(this.ruleIdentifier.type())) && (RULE_TYPE_PRIORITIES.indexOf(this.ruleIdentifier.type()) < RULE_TYPE_PRIORITIES.indexOf(
|
||||
MANUAL_TYPE));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a modified instance of {@link MatchedRule} based on its applied status.
|
||||
* If the rule has been applied, it returns a new {@link MatchedRule} instance that retains all properties of the original
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model;
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.entity;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
|
||||
@ -0,0 +1,10 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.entity;
|
||||
|
||||
public interface Relation {
|
||||
|
||||
TextEntity getA();
|
||||
|
||||
|
||||
TextEntity getB();
|
||||
|
||||
}
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.drools;
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.entity;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.drools;
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.entity;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@ -4,6 +4,7 @@ import java.awt.geom.Rectangle2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
@ -11,7 +12,10 @@ import java.util.Map;
|
||||
import java.util.PriorityQueue;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.collections4.map.HashedMap;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
@ -24,6 +28,10 @@ import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
/**
|
||||
* Represents a text entity within a document, characterized by its text range, type, entity type,
|
||||
* and associated metadata like matched rules, pages, and engines.
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@ -39,13 +47,14 @@ public class TextEntity implements IEntity {
|
||||
|
||||
TextRange textRange;
|
||||
@Builder.Default
|
||||
List<TextRange> duplicateTextRanges = new ArrayList<>();
|
||||
Set<TextRange> duplicateTextRanges = new HashSet<>();
|
||||
String type; // TODO: make final once ManualChangesApplicationService::recategorize is deleted
|
||||
final EntityType entityType;
|
||||
|
||||
@Builder.Default
|
||||
final PriorityQueue<MatchedRule> matchedRuleList = new PriorityQueue<>();
|
||||
final ManualChangeOverwrite manualOverwrite;
|
||||
@Builder.Default
|
||||
final ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite();
|
||||
|
||||
boolean dictionaryEntry;
|
||||
boolean dossierDictionaryEntry;
|
||||
@ -64,6 +73,12 @@ public class TextEntity implements IEntity {
|
||||
List<SemanticNode> intersectingNodes = new LinkedList<>();
|
||||
SemanticNode deepestFullyContainingNode;
|
||||
|
||||
@Builder.Default
|
||||
Map<TextEntity, Set<Relation>> relations = new HashMap<>();
|
||||
|
||||
@Builder.Default
|
||||
Collection<EntityEventListener> entityEventListeners = new ArrayList<>();
|
||||
|
||||
|
||||
public static TextEntity initialEntityNode(TextRange textRange, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
@ -154,12 +169,15 @@ public class TextEntity implements IEntity {
|
||||
|
||||
public void removeFromGraph() {
|
||||
|
||||
remove("FINAL.0.0", "removed completely");
|
||||
intersectingNodes.forEach(node -> node.getEntities().remove(this));
|
||||
pages.forEach(page -> page.getEntities().remove(this));
|
||||
intersectingNodes = new LinkedList<>();
|
||||
relations.keySet()
|
||||
.forEach(entity -> entity.getRelations().remove(this));
|
||||
relations = new HashedMap<>();
|
||||
deepestFullyContainingNode = null;
|
||||
pages = new HashSet<>();
|
||||
remove("FINAL.0.0", "removed completely");
|
||||
}
|
||||
|
||||
|
||||
@ -194,28 +212,34 @@ public class TextEntity implements IEntity {
|
||||
|
||||
public boolean containedBy(TextEntity textEntity) {
|
||||
|
||||
return this.textRange.containedBy(textEntity.getTextRange()) //
|
||||
|| duplicateTextRanges.stream()
|
||||
.anyMatch(duplicateTextRange -> duplicateTextRange.containedBy(textEntity.textRange)) //
|
||||
|| duplicateTextRanges.stream()
|
||||
.anyMatch(duplicateTextRange -> textEntity.getDuplicateTextRanges()
|
||||
.stream()
|
||||
.anyMatch(duplicateTextRange::containedBy));
|
||||
return textEntity.contains(this);
|
||||
}
|
||||
|
||||
|
||||
public boolean contains(TextEntity textEntity) {
|
||||
|
||||
return this.textRange.contains(textEntity.getTextRange()) //
|
||||
|| duplicateTextRanges.stream()
|
||||
.anyMatch(duplicateTextRange -> duplicateTextRange.contains(textEntity.textRange)) //
|
||||
|| duplicateTextRanges.stream()
|
||||
.anyMatch(duplicateTextRange -> textEntity.getDuplicateTextRanges()
|
||||
.stream()
|
||||
.anyMatch(duplicateTextRange::contains));
|
||||
if (this.textRange.contains(textEntity.getTextRange())) {
|
||||
return true;
|
||||
}
|
||||
|
||||
Set<TextRange> textEntityDuplicateRanges = textEntity.getDuplicateTextRanges();
|
||||
|
||||
for (TextRange duplicateTextRange : this.duplicateTextRanges) {
|
||||
if (duplicateTextRange.contains(textEntity.getTextRange())) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (TextRange otherRange : textEntityDuplicateRanges) {
|
||||
if (duplicateTextRange.contains(otherRange)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public boolean intersects(TextEntity textEntity) {
|
||||
|
||||
return this.textRange.intersects(textEntity.getTextRange()) //
|
||||
@ -240,6 +264,20 @@ public class TextEntity implements IEntity {
|
||||
}
|
||||
|
||||
|
||||
public void addManualChange(BaseAnnotation manualChange) {
|
||||
|
||||
manualOverwrite.addChange(manualChange);
|
||||
notifyEntityUpdated();
|
||||
}
|
||||
|
||||
|
||||
public void addManualChanges(List<BaseAnnotation> manualChanges) {
|
||||
|
||||
manualOverwrite.addChanges(manualChanges);
|
||||
notifyEntityUpdated();
|
||||
}
|
||||
|
||||
|
||||
public boolean matchesAnnotationId(String manualRedactionId) {
|
||||
|
||||
return getPositionsOnPagePerPage().stream()
|
||||
@ -278,6 +316,21 @@ public class TextEntity implements IEntity {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return true when this entity is of EntityType ENTITY or HINT
|
||||
*/
|
||||
public boolean validEntityType() {
|
||||
|
||||
return entityType.equals(EntityType.ENTITY) || entityType.equals(EntityType.HINT);
|
||||
}
|
||||
|
||||
|
||||
public boolean valid() {
|
||||
|
||||
return active() && validEntityType();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String value() {
|
||||
|
||||
@ -285,4 +338,42 @@ public class TextEntity implements IEntity {
|
||||
.orElse(getMatchedRule().isWriteValueWithLineBreaks() ? getValueWithLineBreaks() : value);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void addEntityEventListener(EntityEventListener listener) {
|
||||
|
||||
entityEventListeners.add(listener);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void removeEntityEventListener(EntityEventListener listener) {
|
||||
|
||||
entityEventListeners.remove(listener);
|
||||
|
||||
}
|
||||
|
||||
|
||||
public void computeRelations() {
|
||||
|
||||
for (TextEntity textEntity : this.getDeepestFullyContainingNode().getEntities()) {
|
||||
if (this.intersects(textEntity) && !this.equals(textEntity) && !textEntity.getEntityType().equals(EntityType.TEMPORARY)) {
|
||||
if (textEntity.getTextRange().equals(this.getTextRange())) {
|
||||
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Equality(this, textEntity));
|
||||
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Equality(textEntity, this));
|
||||
} else if (textEntity.containedBy(this)) {
|
||||
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this));
|
||||
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Containment(this, textEntity));
|
||||
} else if (this.containedBy(textEntity)) {
|
||||
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Containment(textEntity, this));
|
||||
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity));
|
||||
} else {
|
||||
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this));
|
||||
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -9,7 +9,6 @@ import java.util.Set;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -10,6 +10,7 @@ import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
@ -38,7 +39,6 @@ public class Document extends AbstractSemanticNode {
|
||||
@Builder.Default
|
||||
static final SectionIdentifier sectionIdentifier = SectionIdentifier.document();
|
||||
|
||||
|
||||
@Override
|
||||
public NodeType getType() {
|
||||
|
||||
@ -63,8 +63,8 @@ public class Document extends AbstractSemanticNode {
|
||||
*
|
||||
* @return A list of main sections within the document
|
||||
* @deprecated This method is marked for removal.
|
||||
* Use {@link #streamChildrenOfType(NodeType)} instead,
|
||||
* or {@link #getChildrenOfTypeSectionOrSuperSection()} which returns children of type SECTION as well as SUPER_SECTION.
|
||||
* Use {@link #streamChildrenOfType(NodeType)} instead,
|
||||
* or {@link #getChildrenOfTypeSectionOrSuperSection()} which returns children of type SECTION as well as SUPER_SECTION.
|
||||
*/
|
||||
@Deprecated(forRemoval = true)
|
||||
public List<Section> getMainSections() {
|
||||
@ -168,4 +168,11 @@ public class Document extends AbstractSemanticNode {
|
||||
return bBox;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void accept(NodeVisitor visitor) {
|
||||
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,19 +1,10 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.NoArgsConstructor;
|
||||
@ -67,4 +58,11 @@ public class Footer extends AbstractSemanticNode {
|
||||
return getTreeId() + ": " + NodeType.FOOTER + ": " + leafTextBlock.buildSummary();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void accept(NodeVisitor visitor) {
|
||||
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,19 +1,10 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.NoArgsConstructor;
|
||||
@ -70,4 +61,11 @@ public class Header extends AbstractSemanticNode {
|
||||
return getTreeId() + ": " + NodeType.HEADER + ": " + leafTextBlock.buildSummary();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void accept(NodeVisitor visitor) {
|
||||
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,20 +1,11 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.NoArgsConstructor;
|
||||
@ -71,6 +62,11 @@ public class Headline extends AbstractSemanticNode {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Extracts the SectionIdentifier from the text of this headline.
|
||||
*
|
||||
* @return The SectionIdentifier, with which the headline starts.
|
||||
*/
|
||||
@Override
|
||||
public SectionIdentifier getSectionIdentifier() {
|
||||
|
||||
@ -104,4 +100,11 @@ public class Headline extends AbstractSemanticNode {
|
||||
.isPresent();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void accept(NodeVisitor visitor) {
|
||||
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,24 +1,22 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.PriorityQueue;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityEventListener;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -30,8 +28,7 @@ import lombok.experimental.FieldDefaults;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
|
||||
/**
|
||||
*
|
||||
Represents an image within the document.
|
||||
* Represents an image within the document.
|
||||
*/
|
||||
@Data
|
||||
@SuperBuilder
|
||||
@ -43,6 +40,7 @@ public class Image extends AbstractSemanticNode implements IEntity {
|
||||
|
||||
String id;
|
||||
|
||||
String representationHash;
|
||||
TextBlock leafTextBlock;
|
||||
|
||||
ImageType imageType;
|
||||
@ -57,6 +55,9 @@ public class Image extends AbstractSemanticNode implements IEntity {
|
||||
|
||||
Page page;
|
||||
|
||||
@Builder.Default
|
||||
Collection<EntityEventListener> entityEventListeners = new ArrayList<>();
|
||||
|
||||
|
||||
@Override
|
||||
public NodeType getType() {
|
||||
@ -79,6 +80,18 @@ public class Image extends AbstractSemanticNode implements IEntity {
|
||||
}
|
||||
|
||||
|
||||
public boolean isFullPageImage() {
|
||||
|
||||
return imageType.equals(ImageType.OCR) || getArea() >= 0.5 * page.getArea();
|
||||
}
|
||||
|
||||
|
||||
private double getArea() {
|
||||
|
||||
return position.getWidth() * position.getHeight();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public TextRange getTextRange() {
|
||||
|
||||
@ -93,17 +106,33 @@ public class Image extends AbstractSemanticNode implements IEntity {
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void addEntityEventListener(EntityEventListener listener) {
|
||||
|
||||
entityEventListeners.add(listener);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void removeEntityEventListener(EntityEventListener listener) {
|
||||
|
||||
entityEventListeners.remove(listener);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String type() {
|
||||
|
||||
return getManualOverwrite().getType().orElse(imageType.toString().toLowerCase(Locale.ENGLISH));
|
||||
return getManualOverwrite().getType()
|
||||
.orElse(imageType.toString().toLowerCase(Locale.ENGLISH));
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return getTreeId() + ": " + getValue() + " " + position;
|
||||
return getTreeId() + ": " + getValue() + " [%.2f,%.2f,%.2f,%.2f]".formatted(position.getX(), position.getY(), position.getWidth(), position.getHeight());
|
||||
}
|
||||
|
||||
|
||||
@ -154,4 +183,18 @@ public class Image extends AbstractSemanticNode implements IEntity {
|
||||
return (area / calculatedIntersection) > containmentThreshold;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void accept(NodeVisitor visitor) {
|
||||
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean isLeaf() {
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
|
||||
|
||||
public enum LayoutEngine {
|
||||
ALGORITHM,
|
||||
AI,
|
||||
OUTLINE
|
||||
}
|
||||
@ -12,7 +12,9 @@ public enum NodeType {
|
||||
TABLE_CELL,
|
||||
IMAGE,
|
||||
HEADER,
|
||||
FOOTER;
|
||||
FOOTER,
|
||||
TABLE_OF_CONTENTS,
|
||||
TABLE_OF_CONTENTS_ITEM;
|
||||
|
||||
|
||||
public String toString() {
|
||||
@ -0,0 +1,127 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
/**
|
||||
* Represents a single page in a document.
|
||||
*/
|
||||
@Getter
|
||||
@Setter
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
|
||||
public class Page {
|
||||
|
||||
@EqualsAndHashCode.Include
|
||||
Integer number;
|
||||
Integer height;
|
||||
Integer width;
|
||||
Integer rotation;
|
||||
|
||||
List<AtomicTextBlock> textBlocksOnPage;
|
||||
Header header;
|
||||
Footer footer;
|
||||
|
||||
@Builder.Default
|
||||
Set<TextEntity> entities = new HashSet<>();
|
||||
|
||||
@Builder.Default
|
||||
Set<Image> images = new HashSet<>();
|
||||
|
||||
|
||||
/**
|
||||
* Constructs and returns a {@link TextBlock} representing the concatenated text of all leaf semantic nodes in the main body.
|
||||
*
|
||||
* @return The main body text block.
|
||||
*/
|
||||
public TextBlock getMainBodyTextBlock() {
|
||||
|
||||
return textBlocksOnPage.stream()
|
||||
.filter(atb -> !atb.isEmpty())
|
||||
.collect(new TextBlockCollector());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieves the highest SemanticNodes, which appear only on this page. It is achieved by traversing the DocumentTree up, until a SemanticNode's direct parent is no longer exclusively on this page.
|
||||
*
|
||||
* @return A list which contains the highes SemanticNodes, which appear only on this page.
|
||||
*/
|
||||
public List<SemanticNode> getMainBody() {
|
||||
|
||||
return textBlocksOnPage.stream()
|
||||
.map(AtomicTextBlock::getParent)
|
||||
.map(this::getHighestParentOnlyOnPage)
|
||||
.distinct()
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieves the highest SemanticNodes which are present on the page. There might be multiples, as two or more Main Sections start on a page.
|
||||
* This is achieved by traversing up the document tree and returning all SemanticNodes whose direct parent is the Document
|
||||
*
|
||||
* @return A list of the highest SemanticNodes present on this page
|
||||
*/
|
||||
public Stream<SemanticNode> streamHighestSemanticNodesOnPage() {
|
||||
|
||||
return textBlocksOnPage.stream()
|
||||
.map(AtomicTextBlock::getParent)
|
||||
.map(this::getHighestSemanticNodeOnPage)
|
||||
.distinct();
|
||||
}
|
||||
|
||||
|
||||
private SemanticNode getHighestParentOnlyOnPage(SemanticNode node) {
|
||||
|
||||
SemanticNode currentNode = node;
|
||||
while (currentNode.hasParent() && currentNode.getParent().onlyOnPage(this)) {
|
||||
currentNode = currentNode.getParent();
|
||||
}
|
||||
return currentNode;
|
||||
}
|
||||
|
||||
|
||||
private SemanticNode getHighestSemanticNodeOnPage(SemanticNode node) {
|
||||
|
||||
SemanticNode currentNode = node;
|
||||
while (currentNode.hasParent() //
|
||||
&& !currentNode.getParent().getType().equals(NodeType.DOCUMENT)) {
|
||||
currentNode = currentNode.getParent();
|
||||
}
|
||||
return currentNode;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return "Page: " + number;
|
||||
}
|
||||
|
||||
|
||||
public double getArea() {
|
||||
|
||||
return width * height;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,19 +1,10 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
@ -59,4 +50,11 @@ public class Paragraph extends AbstractSemanticNode {
|
||||
return getTreeId() + ": " + NodeType.PARAGRAPH + ": " + leafTextBlock.buildSummary();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void accept(NodeVisitor visitor) {
|
||||
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,23 +1,11 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
@ -33,7 +21,6 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true)
|
||||
public class Section extends AbstractSemanticNode {
|
||||
|
||||
|
||||
@Override
|
||||
public NodeType getType() {
|
||||
|
||||
@ -53,6 +40,11 @@ public class Section extends AbstractSemanticNode {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the SectionIdentifier from the headline obtained by the getHeadline() method.
|
||||
*
|
||||
* @return the SectionIdentifier of the associated Headline
|
||||
*/
|
||||
@Override
|
||||
public SectionIdentifier getSectionIdentifier() {
|
||||
|
||||
@ -60,7 +52,6 @@ public class Section extends AbstractSemanticNode {
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
@ -101,4 +92,10 @@ public class Section extends AbstractSemanticNode {
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void accept(NodeVisitor visitor) {
|
||||
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
}
|
||||
@ -3,29 +3,36 @@ package com.iqser.red.service.redaction.v1.server.model.document.nodes;
|
||||
import java.util.Collections;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
/**
|
||||
* Represents a unique identifier for a section within a document.
|
||||
* Represents the textual identifier sometimes present in a Headline. For example, given the headline 3.1 Results, the section identifier is 3.1.
|
||||
* Keep in mind, this identifier must not be unique in a single document, as there might be multiple headlines starting with the same textual identifier.
|
||||
*/
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class SectionIdentifier {
|
||||
|
||||
static Pattern numericalIdentifierPattern = Pattern.compile("^[\\s]?(\\d+)[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?");
|
||||
public static Pattern numericalIdentifierPattern = Pattern.compile("^[\\s]?(\\d{1,2})(?:[\\s.,;](\\d{1,2}))?(?:[\\s.,;](\\d{1,2}))?(?:[\\s.,;](\\d{1,2}))?");
|
||||
public static Pattern alphanumericIdentifierPattern = Pattern.compile("^[\\s]?[A-Za-z][\\s.,;]?(\\d+)[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?[\\s.,;]?");
|
||||
|
||||
private enum Format {
|
||||
protected enum Format {
|
||||
EMPTY,
|
||||
NUMERICAL,
|
||||
ALPHANUMERIC,
|
||||
DOCUMENT
|
||||
}
|
||||
|
||||
@Getter
|
||||
Format format;
|
||||
@Getter
|
||||
String identifierString;
|
||||
List<Integer> identifiers;
|
||||
boolean asChild;
|
||||
@ -47,6 +54,10 @@ public class SectionIdentifier {
|
||||
if (numericalIdentifierMatcher.find()) {
|
||||
return buildNumericalSectionIdentifier(headline, numericalIdentifierMatcher);
|
||||
}
|
||||
Matcher alphanumericIdentifierMatcher = alphanumericIdentifierPattern.matcher(headline);
|
||||
if (alphanumericIdentifierMatcher.find()) {
|
||||
return buildAlphanumericSectionIdentifier(headline, alphanumericIdentifierMatcher);
|
||||
}
|
||||
// more formats here
|
||||
return SectionIdentifier.empty();
|
||||
}
|
||||
@ -105,8 +116,34 @@ public class SectionIdentifier {
|
||||
}
|
||||
|
||||
|
||||
private static SectionIdentifier buildAlphanumericSectionIdentifier(String headline, Matcher alphanumericIdentifierMatcher) {
|
||||
|
||||
String identifierString = headline.substring(alphanumericIdentifierMatcher.start(), alphanumericIdentifierMatcher.end());
|
||||
|
||||
String alphanumericIdentifier = alphanumericIdentifierMatcher.group(0).substring(0, 1).toUpperCase(Locale.ENGLISH);
|
||||
int mappedCharacterValue = alphanumericIdentifier.charAt(0) - 'A' + 1;
|
||||
List<Integer> identifiers = new LinkedList<>();
|
||||
identifiers.add(mappedCharacterValue);
|
||||
|
||||
for (int i = 1; i <= 3; i++) {
|
||||
String numericalIdentifier = alphanumericIdentifierMatcher.group(i);
|
||||
if (numericalIdentifier == null || numericalIdentifier.equals("0") || numericalIdentifier.isEmpty() || numericalIdentifier.isBlank()) {
|
||||
break;
|
||||
}
|
||||
identifiers.add(Integer.parseInt(numericalIdentifier.trim()));
|
||||
}
|
||||
|
||||
return new SectionIdentifier(Format.ALPHANUMERIC,
|
||||
// Changed format to reflect alphanumeric
|
||||
identifierString,
|
||||
identifiers.stream()
|
||||
.toList(),
|
||||
false);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Determines if the current section is the parent of the given section.
|
||||
* Determines if the current SectionIdentifier is the parent of the given SectionIdentifier.
|
||||
*
|
||||
* @param sectionIdentifier The section identifier to compare against.
|
||||
* @return true if the current section is the parent of the given section, false otherwise.
|
||||
@ -155,4 +192,30 @@ public class SectionIdentifier {
|
||||
return identifierString;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return true, when no identifier could be found
|
||||
*/
|
||||
public boolean isEmpty() {
|
||||
|
||||
return this.format.equals(Format.EMPTY);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The level of a SectionIdentifier corresponds with the count of identifiers. E.g. 1.1 is level 2, and 1. is level 1
|
||||
*
|
||||
* @return the level of the Headline
|
||||
*/
|
||||
public int level() {
|
||||
|
||||
return identifiers.size();
|
||||
}
|
||||
|
||||
|
||||
protected List<Integer> getIdentifiers() {
|
||||
|
||||
return identifiers;
|
||||
}
|
||||
|
||||
}
|
||||
@ -15,15 +15,16 @@ import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConsecutiveTextBlockCollector;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
||||
import com.iqser.red.service.redaction.v1.server.service.document.NodeVisitor;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
|
||||
public interface SemanticNode {
|
||||
|
||||
@ -50,6 +51,23 @@ public interface SemanticNode {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Searches all Nodes located underneath this Node in the DocumentTree that are found on the given pages.
|
||||
* Then consecutive AtomicTextBlocks are concatenated where possible and the list of the resulting TextBlocks is returned.
|
||||
*
|
||||
* @return List of TextBlocks containing all AtomicTextBlocks that are located under this Node on the given pages.
|
||||
*/
|
||||
default List<TextBlock> getTextBlocksByPageNumbers(Set<Integer> pageNumbers) {
|
||||
|
||||
return streamAllSubNodes().filter(SemanticNode::isLeaf)
|
||||
.map(SemanticNode::getTextBlock)
|
||||
.map(TextBlock::getAtomicTextBlocks)
|
||||
.flatMap(List::stream)
|
||||
.filter(atb -> pageNumbers.contains(atb.getPage().getNumber()))
|
||||
.collect(new ConsecutiveTextBlockCollector());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Any Node maintains its own Set of Entities.
|
||||
* This Set contains all Entities whose TextRange intersects the TextRange of this node.
|
||||
@ -59,6 +77,20 @@ public interface SemanticNode {
|
||||
Set<TextEntity> getEntities();
|
||||
|
||||
|
||||
/**
|
||||
* A view of the Entity Set of this SemanticNode including only the active (APPLIED or SKIPPED) Entities which are of a valid type (ENTITY or HINT).
|
||||
* This is used for all functions, which check for the existence of an Entity, such as hasEntityOfType().
|
||||
*
|
||||
* @return Set of valid TextEntities
|
||||
*/
|
||||
default Stream<TextEntity> streamValidEntities() {
|
||||
|
||||
return getEntities().stream()
|
||||
.filter(IEntity::active)
|
||||
.filter(TextEntity::validEntityType);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Each AtomicTextBlock is assigned a page, so to get the pages this node appears on, it collects the PageNodes from each AtomicTextBlock belonging to this node's TextBlock.
|
||||
*
|
||||
@ -149,7 +181,7 @@ public interface SemanticNode {
|
||||
|
||||
|
||||
/**
|
||||
* Returns a SectionIdentifier, such that it acts as a child of the first Headline associated with this SemanticNode.
|
||||
* Returns the SectionIdentifier as a child of the SectionIdentifier returned by the getHeadline() method.
|
||||
*
|
||||
* @return The SectionIdentifier from the first Headline.
|
||||
*/
|
||||
@ -259,9 +291,7 @@ public interface SemanticNode {
|
||||
*/
|
||||
default boolean hasEntitiesOfType(String type) {
|
||||
|
||||
return getEntities().stream()
|
||||
.filter(TextEntity::active)
|
||||
.anyMatch(redactionEntity -> redactionEntity.type().equals(type));
|
||||
return streamValidEntities().anyMatch(redactionEntity -> redactionEntity.type().equals(type));
|
||||
}
|
||||
|
||||
|
||||
@ -274,10 +304,8 @@ public interface SemanticNode {
|
||||
*/
|
||||
default boolean hasEntitiesOfAnyType(String... types) {
|
||||
|
||||
return getEntities().stream()
|
||||
.filter(TextEntity::active)
|
||||
.anyMatch(redactionEntity -> Arrays.stream(types)
|
||||
.anyMatch(type -> redactionEntity.type().equals(type)));
|
||||
return streamValidEntities().anyMatch(redactionEntity -> Arrays.stream(types)
|
||||
.anyMatch(type -> redactionEntity.type().equals(type)));
|
||||
}
|
||||
|
||||
|
||||
@ -290,9 +318,7 @@ public interface SemanticNode {
|
||||
*/
|
||||
default boolean hasEntitiesOfAllTypes(String... types) {
|
||||
|
||||
return getEntities().stream()
|
||||
.filter(TextEntity::active)
|
||||
.map(TextEntity::type)
|
||||
return streamValidEntities().map(TextEntity::type)
|
||||
.collect(Collectors.toUnmodifiableSet())
|
||||
.containsAll(Arrays.stream(types)
|
||||
.toList());
|
||||
@ -301,31 +327,28 @@ public interface SemanticNode {
|
||||
|
||||
/**
|
||||
* Returns a List of Entities in this SemanticNode which are of the provided type such as "CBI_author".
|
||||
* Ignores Entity with ignored == true or removed == true.
|
||||
* Ignores Entity which are not active or of a removal type ignored == true or removed == true.
|
||||
*
|
||||
* @param type string representing the type of entities to return
|
||||
* @return List of RedactionEntities of any the type
|
||||
*/
|
||||
default List<TextEntity> getEntitiesOfType(String type) {
|
||||
|
||||
return getEntities().stream()
|
||||
.filter(TextEntity::active)
|
||||
.filter(redactionEntity -> redactionEntity.type().equals(type))
|
||||
return streamValidEntities().filter(redactionEntity -> redactionEntity.type().equals(type))
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a List of Entities in this SemanticNode which have any of the provided types such as "CBI_author".
|
||||
* Ignores Entity with ignored == true or removed == true.
|
||||
* Ignores Entity that are not valid.
|
||||
*
|
||||
* @param types A list of strings representing the types of entities to return
|
||||
* @return List of RedactionEntities of any provided type
|
||||
*/
|
||||
default List<TextEntity> getEntitiesOfType(List<String> types) {
|
||||
|
||||
return getEntities().stream()
|
||||
.filter(TextEntity::active)
|
||||
return streamValidEntities()//
|
||||
.filter(redactionEntity -> redactionEntity.isAnyType(types))
|
||||
.toList();
|
||||
}
|
||||
@ -333,15 +356,14 @@ public interface SemanticNode {
|
||||
|
||||
/**
|
||||
* Returns a List of Entities in this SemanticNode which have any of the provided types.
|
||||
* Ignores Entity with the ignored flag set to true or the removed flag set to true.
|
||||
* Ignores Entity that are not valid.
|
||||
*
|
||||
* @param types A list of strings representing the types of entities to return
|
||||
* @return List of RedactionEntities that match any of the provided types
|
||||
*/
|
||||
default List<TextEntity> getEntitiesOfType(String... types) {
|
||||
|
||||
return getEntities().stream()
|
||||
.filter(TextEntity::active)
|
||||
return streamValidEntities()//
|
||||
.filter(redactionEntity -> redactionEntity.isAnyType(Arrays.stream(types)
|
||||
.toList()))
|
||||
.toList();
|
||||
@ -445,7 +467,7 @@ public interface SemanticNode {
|
||||
*/
|
||||
default boolean containsStringIgnoreCase(String string) {
|
||||
|
||||
return getTextBlock().getSearchText().toLowerCase(Locale.ROOT).contains(string.toLowerCase(Locale.ROOT));
|
||||
return getTextBlock().getSearchTextLowerCase().contains(string.toLowerCase(Locale.ROOT));
|
||||
}
|
||||
|
||||
|
||||
@ -756,13 +778,12 @@ public interface SemanticNode {
|
||||
|
||||
|
||||
/**
|
||||
* TODO: this produces unwanted results for sections spanning multiple columns.
|
||||
* Computes the Union of the bounding boxes of all children recursively.
|
||||
*
|
||||
* @return The union of the BoundingBoxes of all children
|
||||
*/
|
||||
private Map<Page, Rectangle2D> getBBoxFromChildren() {
|
||||
|
||||
//TODO: this produces unwanted results for sections spanning multiple columns.
|
||||
Map<Page, Rectangle2D> bBoxPerPage = new HashMap<>();
|
||||
List<Map<Page, Rectangle2D>> childrenBBoxes = streamChildren().map(SemanticNode::getBBox)
|
||||
.toList();
|
||||
@ -797,15 +818,24 @@ public interface SemanticNode {
|
||||
|
||||
/**
|
||||
* Accepts a {@link NodeVisitor} and initiates a depth-first traversal of the semantic tree rooted at this node.
|
||||
* The visitor's {@link NodeVisitor#visit(SemanticNode)} method is invoked for each node encountered during the traversal.
|
||||
* The visitor's {@link NodeVisitor#visit} method is invoked for each node encountered during the traversal.
|
||||
*
|
||||
* @param visitor The {@link NodeVisitor} to accept and apply during the traversal.
|
||||
* @see NodeVisitor
|
||||
*/
|
||||
default void accept(NodeVisitor visitor) {
|
||||
void accept(NodeVisitor visitor);
|
||||
|
||||
visitor.visit(this);
|
||||
streamChildren().forEach(childNode -> childNode.accept(visitor));
|
||||
|
||||
/**
|
||||
* Checks wether this SemanticNode appears on a single page only, and if that page is the provided one.
|
||||
*
|
||||
* @param page the page to check
|
||||
* @return true, when SemanticNode is on a single page only and the page is the provided page. Otherwise, false.
|
||||
*/
|
||||
default boolean onlyOnPage(Page page) {
|
||||
|
||||
Set<Page> pages = getPages();
|
||||
return pages.size() == 1 && pages.contains(page);
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,8 +1,9 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
@ -39,6 +40,11 @@ public class SuperSection extends AbstractSemanticNode {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the SectionIdentifier from the headline obtained by the getHeadline() method.
|
||||
*
|
||||
* @return the SectionIdentifier of the associated Headline
|
||||
*/
|
||||
@Override
|
||||
public SectionIdentifier getSectionIdentifier() {
|
||||
|
||||
@ -46,7 +52,6 @@ public class SuperSection extends AbstractSemanticNode {
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
@ -87,4 +92,10 @@ public class SuperSection extends AbstractSemanticNode {
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void accept(NodeVisitor visitor) {
|
||||
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
}
|
||||
@ -14,10 +14,10 @@ import java.util.stream.IntStream;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -74,8 +74,7 @@ public class Table implements SemanticNode {
|
||||
return IntStream.range(0, numberOfRows).boxed()
|
||||
.filter(row -> rowContainsStringsIgnoreCase(row, strings))
|
||||
.flatMap(this::streamRow)
|
||||
.map(TableCell::getEntities)
|
||||
.flatMap(Collection::stream);
|
||||
.flatMap(TableCell::streamValidEntities);
|
||||
}
|
||||
|
||||
|
||||
@ -135,11 +134,11 @@ public class Table implements SemanticNode {
|
||||
|
||||
|
||||
/**
|
||||
* Streams all entities in this table, that appear in a row, which contains at least one entity with any of the provided types.
|
||||
* Streams all entities in this table, that appear in a row, which contains at least one valid entity with any of the provided types.
|
||||
* Ignores Entity with ignored == true or removed == true.
|
||||
*
|
||||
* @param types type strings to check whether a row contains an entity like them
|
||||
* @return Stream of all entities in this table, that appear in a row, which contains at least one entity with any of the provided types.
|
||||
* @return Stream of all entities in this table, that appear in a row, which contains at least one valid entity with any of the provided types.
|
||||
*/
|
||||
public Stream<TextEntity> streamEntitiesWhereRowContainsEntitiesOfType(List<String> types) {
|
||||
|
||||
@ -192,30 +191,26 @@ public class Table implements SemanticNode {
|
||||
|
||||
|
||||
/**
|
||||
* Streams all Entities in the given row.
|
||||
* Streams all valid Entities in the given row.
|
||||
*
|
||||
* @param rowNumber the row number to look for
|
||||
* @return stream of TextEntities occurring in row
|
||||
*/
|
||||
public Stream<TextEntity> streamTextEntitiesInRow(int rowNumber) {
|
||||
|
||||
return streamRow(rowNumber).map(TableCell::getEntities)
|
||||
.flatMap(Collection::stream)
|
||||
.filter(TextEntity::active);
|
||||
return streamRow(rowNumber).flatMap(TableCell::streamValidEntities);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Streams all Entities in the given col.
|
||||
* Streams all valid Entities in the given col.
|
||||
*
|
||||
* @param colNumber the column number to look for
|
||||
* @return stream of TextEntities occurring in row
|
||||
*/
|
||||
public Stream<TextEntity> streamTextEntitiesInCol(int colNumber) {
|
||||
|
||||
return streamCol(colNumber).map(TableCell::getEntities)
|
||||
.flatMap(Collection::stream)
|
||||
.filter(TextEntity::active);
|
||||
return streamCol(colNumber).flatMap(TableCell::streamValidEntities);
|
||||
}
|
||||
|
||||
|
||||
@ -269,6 +264,7 @@ public class Table implements SemanticNode {
|
||||
|
||||
return streamHeaders().filter(tableCellNode -> tableCellNode.getTextBlock().getSearchText().contains(header))
|
||||
.map(TableCell::getCol)
|
||||
.distinct()
|
||||
.flatMap(this::streamCol)
|
||||
.filter(tableCellNode -> !tableCellNode.isHeader());
|
||||
}
|
||||
@ -423,4 +419,11 @@ public class Table implements SemanticNode {
|
||||
return treeId.toString() + ": " + NodeType.TABLE + ": #cols: " + numberOfCols + ", #rows: " + numberOfRows + ", " + this.getTextBlock().buildSummary();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void accept(NodeVisitor visitor) {
|
||||
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
}
|
||||
@ -2,20 +2,14 @@ package com.iqser.red.service.redaction.v1.server.model.document.nodes;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
@ -42,7 +36,6 @@ public class TableCell extends AbstractSemanticNode {
|
||||
TextBlock textBlock;
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public Map<Page, Rectangle2D> getBBox() {
|
||||
|
||||
@ -88,4 +81,11 @@ public class TableCell extends AbstractSemanticNode {
|
||||
return getTreeId() + ": " + NodeType.TABLE_CELL + ": " + this.getTextBlock().buildSummary();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void accept(NodeVisitor visitor) {
|
||||
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,47 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
|
||||
@Data
|
||||
@SuperBuilder
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
public class TableOfContents extends AbstractSemanticNode {
|
||||
|
||||
@Override
|
||||
public NodeType getType() {
|
||||
|
||||
return NodeType.TABLE_OF_CONTENTS;
|
||||
}
|
||||
|
||||
|
||||
public Headline getHeadline() {
|
||||
|
||||
return streamChildrenOfType(NodeType.HEADLINE).map(node -> (Headline) node)
|
||||
.findFirst()
|
||||
.orElseGet(() -> getParent().getHeadline());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void accept(NodeVisitor visitor) {
|
||||
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return getTreeId() + ": " + NodeType.TABLE_OF_CONTENTS_ITEM + ": " + getTextBlock().buildSummary();
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,57 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
|
||||
@Data
|
||||
@SuperBuilder
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
public class TableOfContentsItem extends AbstractSemanticNode {
|
||||
|
||||
TextBlock leafTextBlock;
|
||||
|
||||
|
||||
@Override
|
||||
public NodeType getType() {
|
||||
|
||||
return NodeType.TABLE_OF_CONTENTS_ITEM;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean isLeaf() {
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void accept(NodeVisitor visitor) {
|
||||
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public TextBlock getTextBlock() {
|
||||
|
||||
return leafTextBlock;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return getTreeId() + ": " + NodeType.TABLE_OF_CONTENTS_ITEM + ": " + leafTextBlock.buildSummary();
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,11 +1,12 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.textblock;
|
||||
|
||||
import static com.iqser.red.service.redaction.v1.server.data.DocumentPositionDataProto.DocumentPositionData;
|
||||
import static java.lang.String.format;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.lang.ref.SoftReference;
|
||||
import java.text.BreakIterator;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
@ -15,12 +16,12 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.data.DocumentPositionDataProto.DocumentPositionData.Position;
|
||||
import com.iqser.red.service.redaction.v1.server.data.DocumentTextDataProto.DocumentTextData;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextData;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -28,6 +29,7 @@ import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.Getter;
|
||||
import lombok.NonNull;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
@Data
|
||||
@ -36,19 +38,33 @@ import lombok.experimental.FieldDefaults;
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class AtomicTextBlock implements TextBlock {
|
||||
|
||||
@NonNull
|
||||
Long id;
|
||||
@NonNull
|
||||
Integer numberOnPage;
|
||||
@NonNull
|
||||
Page page;
|
||||
|
||||
//string coordinates
|
||||
@NonNull
|
||||
TextRange textRange;
|
||||
@NonNull
|
||||
String searchText;
|
||||
List<String> words;
|
||||
@NonNull
|
||||
List<Integer> lineBreaks;
|
||||
@NonNull
|
||||
List<TextRange> italicTextRanges;
|
||||
@NonNull
|
||||
List<TextRange> boldTextRanges;
|
||||
|
||||
SoftReference<String> searchTextLowerCaseCache;
|
||||
SoftReference<List<String>> wordsCache;
|
||||
|
||||
//position coordinates
|
||||
@NonNull
|
||||
List<Integer> stringIdxToPositionIdx;
|
||||
@Getter
|
||||
@NonNull
|
||||
List<Rectangle2D> positions;
|
||||
|
||||
@EqualsAndHashCode.Exclude
|
||||
@ -74,6 +90,8 @@ public class AtomicTextBlock implements TextBlock {
|
||||
.stringIdxToPositionIdx(Collections.emptyList())
|
||||
.positions(Collections.emptyList())
|
||||
.parent(parent)
|
||||
.boldTextRanges(Collections.emptyList())
|
||||
.italicTextRanges(Collections.emptyList())
|
||||
.build();
|
||||
}
|
||||
|
||||
@ -86,20 +104,26 @@ public class AtomicTextBlock implements TextBlock {
|
||||
.page(page)
|
||||
.textRange(new TextRange(atomicTextBlockData.getStart(), atomicTextBlockData.getEnd()))
|
||||
.searchText(atomicTextBlockData.getSearchText())
|
||||
.lineBreaks(Arrays.stream(atomicTextBlockData.getLineBreaks()).boxed()
|
||||
.toList())
|
||||
.stringIdxToPositionIdx(Arrays.stream(atomicPositionBlockData.getStringIdxToPositionIdx()).boxed()
|
||||
.toList())
|
||||
.positions(toRectangle2DList(atomicPositionBlockData.getPositions()))
|
||||
.lineBreaks(atomicTextBlockData.getLineBreaksList())
|
||||
.stringIdxToPositionIdx(atomicPositionBlockData.getStringIdxToPositionIdxList())
|
||||
.positions(toRectangle2DList(atomicPositionBlockData.getPositionsList()))
|
||||
.italicTextRanges(atomicTextBlockData.getItalicTextRangesList()
|
||||
.stream()
|
||||
.map(r -> new TextRange(r.getStart(), r.getEnd()))
|
||||
.toList())
|
||||
.boldTextRanges(atomicTextBlockData.getBoldTextRangesList()
|
||||
.stream()
|
||||
.map(r -> new TextRange(r.getStart(), r.getEnd()))
|
||||
.toList())
|
||||
.parent(parent)
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
private static List<Rectangle2D> toRectangle2DList(float[][] positions) {
|
||||
private static List<Rectangle2D> toRectangle2DList(List<Position> positions) {
|
||||
|
||||
return Arrays.stream(positions)
|
||||
.map(floatArr -> (Rectangle2D) new Rectangle2D.Float(floatArr[0], floatArr[1], floatArr[2], floatArr[3]))
|
||||
return positions.stream()
|
||||
.map(pos -> (Rectangle2D) new Rectangle2D.Float(pos.getValue(0), pos.getValue(1), pos.getValue(2), pos.getValue(3)))
|
||||
.toList();
|
||||
}
|
||||
|
||||
@ -121,8 +145,31 @@ public class AtomicTextBlock implements TextBlock {
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String getSearchTextLowerCase() {
|
||||
|
||||
String text = null;
|
||||
if (searchTextLowerCaseCache != null) {
|
||||
text = searchTextLowerCaseCache.get();
|
||||
}
|
||||
|
||||
if (text == null) {
|
||||
text = getSearchText().toLowerCase(Locale.ENGLISH);
|
||||
searchTextLowerCaseCache = new SoftReference<>(text);
|
||||
}
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
|
||||
public List<String> getWords() {
|
||||
|
||||
List<String> words = null;
|
||||
|
||||
if (wordsCache != null) {
|
||||
words = wordsCache.get();
|
||||
}
|
||||
|
||||
if (words == null) {
|
||||
words = new ArrayList<>();
|
||||
BreakIterator iterator = BreakIterator.getWordInstance(Locale.ENGLISH);
|
||||
@ -131,6 +178,7 @@ public class AtomicTextBlock implements TextBlock {
|
||||
for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) {
|
||||
words.add(searchText.substring(start, end));
|
||||
}
|
||||
wordsCache = new SoftReference<>(words);
|
||||
}
|
||||
return words;
|
||||
}
|
||||
@ -3,11 +3,13 @@ package com.iqser.red.service.redaction.v1.server.model.document.textblock;
|
||||
import static java.lang.String.format;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.lang.ref.SoftReference;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Collection;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
@ -16,6 +18,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.Data;
|
||||
import lombok.NonNull;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
@Data
|
||||
@ -25,6 +28,7 @@ public class ConcatenatedTextBlock implements TextBlock {
|
||||
List<AtomicTextBlock> atomicTextBlocks;
|
||||
String searchText;
|
||||
TextRange textRange;
|
||||
SoftReference<String> searchTextLowerCaseCache;
|
||||
|
||||
|
||||
public static ConcatenatedTextBlock empty() {
|
||||
@ -100,6 +104,23 @@ public class ConcatenatedTextBlock implements TextBlock {
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String getSearchTextLowerCase() {
|
||||
|
||||
String text = null;
|
||||
if (searchTextLowerCaseCache != null) {
|
||||
text = searchTextLowerCaseCache.get();
|
||||
}
|
||||
|
||||
if (text == null) {
|
||||
text = getSearchText().toLowerCase(Locale.ENGLISH);
|
||||
searchTextLowerCaseCache = new SoftReference<>(text);
|
||||
}
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<String> getWords() {
|
||||
|
||||
@ -142,6 +163,26 @@ public class ConcatenatedTextBlock implements TextBlock {
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<TextRange> getItalicTextRanges() {
|
||||
|
||||
return getAtomicTextBlocks().stream()
|
||||
.flatMap(atomicTextBlock -> atomicTextBlock.getItalicTextRanges()
|
||||
.stream())
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<TextRange> getBoldTextRanges() {
|
||||
|
||||
return getAtomicTextBlocks().stream()
|
||||
.flatMap(atomicTextBlock -> atomicTextBlock.getBoldTextRanges()
|
||||
.stream())
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Rectangle2D getPosition(int stringIdx) {
|
||||
|
||||
@ -259,6 +300,7 @@ public class ConcatenatedTextBlock implements TextBlock {
|
||||
}
|
||||
|
||||
|
||||
@NonNull
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
@ -0,0 +1,72 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.textblock;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.function.BiConsumer;
|
||||
import java.util.function.BinaryOperator;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Supplier;
|
||||
import java.util.stream.Collector;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@NoArgsConstructor
|
||||
public class ConsecutiveTextBlockCollector implements Collector<TextBlock, List<ConcatenatedTextBlock>, List<TextBlock>> {
|
||||
|
||||
@Override
|
||||
public Supplier<List<ConcatenatedTextBlock>> supplier() {
|
||||
|
||||
return LinkedList::new;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public BiConsumer<List<ConcatenatedTextBlock>, TextBlock> accumulator() {
|
||||
|
||||
return (existingList, textBlock) -> {
|
||||
if (existingList.isEmpty()) {
|
||||
ConcatenatedTextBlock ctb = ConcatenatedTextBlock.empty();
|
||||
ctb.concat(textBlock);
|
||||
existingList.add(ctb);
|
||||
return;
|
||||
}
|
||||
|
||||
ConcatenatedTextBlock prevBlock = existingList.get(existingList.size() - 1);
|
||||
|
||||
if (prevBlock.getTextRange().end() == textBlock.getTextRange().start()) {
|
||||
prevBlock.concat(textBlock);
|
||||
} else {
|
||||
ConcatenatedTextBlock ctb = ConcatenatedTextBlock.empty();
|
||||
ctb.concat(textBlock);
|
||||
existingList.add(ctb);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public BinaryOperator<List<ConcatenatedTextBlock>> combiner() {
|
||||
|
||||
return (list1, list2) -> Stream.concat(list1.stream(), list2.stream())
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Function<List<ConcatenatedTextBlock>, List<TextBlock>> finisher() {
|
||||
|
||||
return a -> a.stream()
|
||||
.map(tb -> (TextBlock) tb)
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Set<Characteristics> characteristics() {
|
||||
|
||||
return Set.of(Characteristics.IDENTITY_FINISH);
|
||||
}
|
||||
|
||||
}
|
||||
@ -19,6 +19,9 @@ public interface TextBlock extends CharSequence {
|
||||
String getSearchText();
|
||||
|
||||
|
||||
String getSearchTextLowerCase();
|
||||
|
||||
|
||||
List<String> getWords();
|
||||
|
||||
|
||||
@ -52,6 +55,12 @@ public interface TextBlock extends CharSequence {
|
||||
String subSequenceWithLineBreaks(TextRange textRange);
|
||||
|
||||
|
||||
List<TextRange> getItalicTextRanges();
|
||||
|
||||
|
||||
List<TextRange> getBoldTextRanges();
|
||||
|
||||
|
||||
int numberOfLines();
|
||||
|
||||
|
||||
@ -1,23 +1,27 @@
|
||||
package com.iqser.red.service.redaction.v1.server.service.document;
|
||||
package com.iqser.red.service.redaction.v1.server.utils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.IntersectingNodeVisitor;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
public class EntityCreationUtility {
|
||||
|
||||
public static void checkIfBothStartAndEndAreEmpty(String start, String end) {
|
||||
public void checkIfBothStartAndEndAreEmpty(String start, String end) {
|
||||
|
||||
checkIfBothStartAndEndAreEmpty(List.of(start), List.of(end));
|
||||
}
|
||||
|
||||
|
||||
public static <T> void checkIfBothStartAndEndAreEmpty(List<T> start, List<T> end) {
|
||||
public <T> void checkIfBothStartAndEndAreEmpty(List<T> start, List<T> end) {
|
||||
|
||||
if ((start == null || start.isEmpty()) && (end == null || end.isEmpty())) {
|
||||
throw new IllegalArgumentException("Start and end values are empty!");
|
||||
@ -25,7 +29,7 @@ public class EntityCreationUtility {
|
||||
}
|
||||
|
||||
|
||||
public static int truncateEndIfLineBreakIsBetween(int end, int expandedEnd, TextBlock textBlock) {
|
||||
public int truncateEndIfLineBreakIsBetween(int end, int expandedEnd, TextBlock textBlock) {
|
||||
|
||||
if (textBlock.getNextLinebreak(end) < expandedEnd) {
|
||||
return end;
|
||||
@ -34,7 +38,7 @@ public class EntityCreationUtility {
|
||||
}
|
||||
|
||||
|
||||
public static Set<SemanticNode> findIntersectingSubNodes(SemanticNode initialIntersectingNode, TextRange textRange) {
|
||||
public Set<SemanticNode> findIntersectingSubNodes(SemanticNode initialIntersectingNode, TextRange textRange) {
|
||||
|
||||
IntersectingNodeVisitor visitor = new IntersectingNodeVisitor(textRange);
|
||||
|
||||
@ -46,7 +50,7 @@ public class EntityCreationUtility {
|
||||
}
|
||||
|
||||
|
||||
public static void addToPages(TextEntity entity) {
|
||||
public void addToPages(TextEntity entity) {
|
||||
|
||||
Set<Page> pages = entity.getDeepestFullyContainingNode().getPages(entity.getTextRange());
|
||||
entity.getPages().addAll(pages);
|
||||
@ -54,14 +58,14 @@ public class EntityCreationUtility {
|
||||
}
|
||||
|
||||
|
||||
public static void addEntityToNodeEntitySets(TextEntity entity) {
|
||||
public void addEntityToNodeEntitySets(TextEntity entity) {
|
||||
|
||||
entity.getIntersectingNodes()
|
||||
.forEach(node -> node.getEntities().add(entity));
|
||||
}
|
||||
|
||||
|
||||
public static boolean allEntitiesIntersectAndHaveSameTypes(List<TextEntity> entitiesToMerge) {
|
||||
public boolean allEntitiesIntersectAndHaveSameTypes(List<TextEntity> entitiesToMerge) {
|
||||
|
||||
if (entitiesToMerge.isEmpty()) {
|
||||
return true;
|
||||
@ -79,7 +83,7 @@ public class EntityCreationUtility {
|
||||
}
|
||||
|
||||
|
||||
public static TextRange toLineAfterTextRange(TextBlock textBlock, TextRange textRange) {
|
||||
public TextRange toLineAfterTextRange(TextBlock textBlock, TextRange textRange) {
|
||||
|
||||
if (textBlock.getTextRange().end() == textRange.end()) {
|
||||
return new TextRange(textRange.end(), textRange.end());
|
||||
@ -1,39 +1,36 @@
|
||||
package com.iqser.red.service.redaction.v1.server.service.document;
|
||||
package com.iqser.red.service.redaction.v1.server.utils;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.AccessLevel;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@UtilityClass
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class EntityEnrichmentService {
|
||||
|
||||
private final RedactionServiceSettings redactionServiceSettings;
|
||||
int SURROUNDING_WORDS_OFFSET_WINDOW = 100;
|
||||
int NUMBER_OF_SURROUNDING_WORDS = 3;
|
||||
|
||||
|
||||
public void enrichEntity(TextEntity entity, TextBlock textBlock) {
|
||||
|
||||
entity.setValue(textBlock.subSequence(entity.getTextRange()).toString());
|
||||
entity.setTextAfter(findTextAfter(entity.getTextRange().end(), textBlock));
|
||||
entity.setTextBefore(findTextBefore(entity.getTextRange().start(), textBlock));
|
||||
}
|
||||
|
||||
|
||||
private String findTextAfter(int index, TextBlock textBlock) {
|
||||
|
||||
int endOffset = Math.min(index + redactionServiceSettings.getSurroundingWordsOffsetWindow(), textBlock.getTextRange().end());
|
||||
int endOffset = Math.min(index + SURROUNDING_WORDS_OFFSET_WINDOW, textBlock.getTextRange().end());
|
||||
String textAfter = textBlock.subSequence(index, endOffset).toString();
|
||||
if (!textAfter.isBlank()) {
|
||||
List<String> wordsAfter = splitToWordsAndRemoveEmptyWords(textAfter);
|
||||
int numberOfWordsAfter = Math.min(wordsAfter.size(), redactionServiceSettings.getNumberOfSurroundingWords());
|
||||
int numberOfWordsAfter = Math.min(wordsAfter.size(), NUMBER_OF_SURROUNDING_WORDS);
|
||||
if (!wordsAfter.isEmpty()) {
|
||||
return concatWordsAfter(wordsAfter.subList(0, numberOfWordsAfter), textAfter.startsWith(" "));
|
||||
}
|
||||
@ -41,14 +38,12 @@ public class EntityEnrichmentService {
|
||||
return "";
|
||||
}
|
||||
|
||||
|
||||
private String findTextBefore(int index, TextBlock textBlock) {
|
||||
|
||||
int offsetBefore = Math.max(index - redactionServiceSettings.getSurroundingWordsOffsetWindow(), textBlock.getTextRange().start());
|
||||
int offsetBefore = Math.max(index - SURROUNDING_WORDS_OFFSET_WINDOW, textBlock.getTextRange().start());
|
||||
String textBefore = textBlock.subSequence(offsetBefore, index).toString();
|
||||
if (!textBefore.isBlank()) {
|
||||
List<String> wordsBefore = splitToWordsAndRemoveEmptyWords(textBefore);
|
||||
int numberOfWordsBefore = Math.min(wordsBefore.size(), redactionServiceSettings.getNumberOfSurroundingWords());
|
||||
int numberOfWordsBefore = Math.min(wordsBefore.size(), NUMBER_OF_SURROUNDING_WORDS);
|
||||
if (!wordsBefore.isEmpty()) {
|
||||
return concatWordsBefore(wordsBefore.subList(wordsBefore.size() - numberOfWordsBefore, wordsBefore.size()), textBefore.endsWith(" "));
|
||||
}
|
||||
@ -56,36 +51,26 @@ public class EntityEnrichmentService {
|
||||
return "";
|
||||
}
|
||||
|
||||
|
||||
private static List<String> splitToWordsAndRemoveEmptyWords(String textAfter) {
|
||||
|
||||
return Arrays.stream(textAfter.split(" "))
|
||||
private List<String> splitToWordsAndRemoveEmptyWords(String text) {
|
||||
return Arrays.stream(text.split(" "))
|
||||
.filter(word -> !Objects.equals("", word))
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
private static String concatWordsBefore(List<String> words, boolean endWithSpace) {
|
||||
|
||||
private String concatWordsBefore(List<String> words, boolean endWithSpace) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
for (String word : words) {
|
||||
sb.append(word).append(" ");
|
||||
}
|
||||
|
||||
String result = sb.toString().trim();
|
||||
return endWithSpace ? result + " " : result;
|
||||
}
|
||||
|
||||
|
||||
private static String concatWordsAfter(List<String> words, boolean startWithSpace) {
|
||||
|
||||
private String concatWordsAfter(List<String> words, boolean startWithSpace) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
for (String word : words) {
|
||||
sb.append(word).append(" ");
|
||||
}
|
||||
|
||||
String result = sb.toString().trim();
|
||||
return startWithSpace ? " " + result : result;
|
||||
}
|
||||
@ -3,12 +3,17 @@ package com.iqser.red.service.redaction.v1.server.utils;
|
||||
import static java.lang.String.format;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.awt.geom.RectangularShape;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
@ -18,6 +23,9 @@ import lombok.experimental.UtilityClass;
|
||||
@UtilityClass
|
||||
public class RedactionSearchUtility {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(RedactionSearchUtility.class);
|
||||
|
||||
|
||||
/**
|
||||
* Checks if any part of a CharSequence matches a given regex pattern.
|
||||
*
|
||||
@ -154,14 +162,8 @@ public class RedactionSearchUtility {
|
||||
*/
|
||||
public static TextRange findTextRangesOfAllLinesInYRange(double maxY, double minY, TextBlock textBlock) {
|
||||
|
||||
List<TextRange> lineBoundaries = IntStream.range(0, textBlock.numberOfLines()).boxed()
|
||||
.map(textBlock::getLineTextRange)
|
||||
.filter(lineBoundary -> isWithinYRange(maxY, minY, textBlock, lineBoundary))
|
||||
.toList();
|
||||
if (lineBoundaries.isEmpty()) {
|
||||
return new TextRange(textBlock.getTextRange().start(), textBlock.getTextRange().start());
|
||||
}
|
||||
return TextRange.merge(lineBoundaries);
|
||||
Predicate<TextRange> isWithinYRange = lineBoundary -> isWithinYRange(maxY, minY, textBlock, lineBoundary);
|
||||
return filterLineBoundaries(textBlock, isWithinYRange);
|
||||
}
|
||||
|
||||
|
||||
@ -172,6 +174,49 @@ public class RedactionSearchUtility {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Identifies all lines within a text block that have roughly the same vertical coordinates.
|
||||
*
|
||||
* @param maxY The maximum Y-coordinate of the vertical range.
|
||||
* @param minY The minimum Y-coordinate of the vertical range.
|
||||
* @param textBlock The text block containing the lines to be checked.
|
||||
* @return A {@link TextRange} encompassing all lines within the specified Y-coordinate range.
|
||||
*/
|
||||
public static TextRange findTextRangesOfAllLinesWithCloseYCoordinates(Double maxY, Double minY, TextBlock textBlock) {
|
||||
|
||||
double averageLineHeight = IntStream.range(0, textBlock.numberOfLines()).boxed()
|
||||
.map(textBlock::getLineTextRange)
|
||||
.flatMap((TextRange stringTextRange) -> textBlock.getPositions(stringTextRange)
|
||||
.stream())
|
||||
.map(RectangularShape::getHeight)
|
||||
.mapToDouble(Double::doubleValue).average()
|
||||
.orElse(0);
|
||||
Predicate<TextRange> hasCloseYRange = lineBoundary -> areYCoordinatesClose(maxY, minY, textBlock, lineBoundary, averageLineHeight);
|
||||
|
||||
return filterLineBoundaries(textBlock, hasCloseYRange);
|
||||
}
|
||||
|
||||
|
||||
private static boolean areYCoordinatesClose(Double maxY, Double minY, TextBlock textBlock, TextRange lineTextRange, double averageLineHeight) {
|
||||
|
||||
Rectangle2D lineBBox = RectangleTransformations.rectangle2DBBox(textBlock.getPositions(lineTextRange));
|
||||
return Math.abs(lineBBox.getMinY() - minY) <= averageLineHeight && Math.abs(maxY - lineBBox.getMaxY()) <= averageLineHeight;
|
||||
}
|
||||
|
||||
|
||||
private static TextRange filterLineBoundaries(TextBlock textBlock, Predicate<TextRange> textRangePredicate) {
|
||||
|
||||
List<TextRange> lineBoundaries = IntStream.range(0, textBlock.numberOfLines()).boxed()
|
||||
.map(textBlock::getLineTextRange)
|
||||
.filter(textRangePredicate)
|
||||
.toList();
|
||||
if (lineBoundaries.isEmpty()) {
|
||||
return new TextRange(textBlock.getTextRange().start(), textBlock.getTextRange().start());
|
||||
}
|
||||
return TextRange.merge(lineBoundaries);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Finds TextRanges matching a regex pattern within a TextBlock.
|
||||
*
|
||||
@ -264,8 +309,12 @@ public class RedactionSearchUtility {
|
||||
|
||||
Matcher matcher = pattern.matcher(textBlock.subSequence(textBlock.getTextRange()));
|
||||
List<TextRange> boundaries = new LinkedList<>();
|
||||
while (matcher.find()) {
|
||||
boundaries.add(new TextRange(matcher.start(group) + textBlock.getTextRange().start(), matcher.end(group) + textBlock.getTextRange().start()));
|
||||
try {
|
||||
while (matcher.find()) {
|
||||
boundaries.add(new TextRange(matcher.start(group) + textBlock.getTextRange().start(), matcher.end(group) + textBlock.getTextRange().start()));
|
||||
}
|
||||
} catch (StackOverflowError stackOverflowError) {
|
||||
log.warn("Stackoverflow error for pattern {} in text: {}", pattern.pattern(), textBlock);
|
||||
}
|
||||
return boundaries;
|
||||
}
|
||||
@ -276,8 +325,12 @@ public class RedactionSearchUtility {
|
||||
String searchTextWithLineBreaks = textBlock.searchTextWithLineBreaks();
|
||||
Matcher matcher = pattern.matcher(searchTextWithLineBreaks);
|
||||
List<TextRange> boundaries = new LinkedList<>();
|
||||
while (matcher.find()) {
|
||||
boundaries.add(new TextRange(matcher.start(group) + textBlock.getTextRange().start(), matcher.end(group) + textBlock.getTextRange().start()));
|
||||
try {
|
||||
while (matcher.find()) {
|
||||
boundaries.add(new TextRange(matcher.start(group) + textBlock.getTextRange().start(), matcher.end(group) + textBlock.getTextRange().start()));
|
||||
}
|
||||
} catch (StackOverflowError stackOverflowError) {
|
||||
log.warn("Stackoverflow error for pattern {} in text with linebreaks: {}", pattern.pattern(), searchTextWithLineBreaks);
|
||||
}
|
||||
return boundaries;
|
||||
}
|
||||
@ -0,0 +1,25 @@
|
||||
syntax = "proto3";
|
||||
|
||||
option java_outer_classname = "DocumentPageProto";
|
||||
option java_package = "com.iqser.red.service.redaction.v1.server.data";
|
||||
|
||||
|
||||
message AllDocumentPages {
|
||||
|
||||
repeated DocumentPage documentPages = 1;
|
||||
}
|
||||
|
||||
message DocumentPage {
|
||||
// The page number, starting with 1.
|
||||
int32 number = 1;
|
||||
|
||||
// The page height in PDF user units.
|
||||
int32 height = 2;
|
||||
|
||||
// The page width in PDF user units.
|
||||
int32 width = 3;
|
||||
|
||||
// The page rotation as specified by the PDF.
|
||||
int32 rotation = 4;
|
||||
}
|
||||
|
||||
@ -0,0 +1,28 @@
|
||||
syntax = "proto3";
|
||||
|
||||
option java_outer_classname = "DocumentPositionDataProto";
|
||||
option java_package = "com.iqser.red.service.redaction.v1.server.data";
|
||||
|
||||
message AllDocumentPositionData {
|
||||
|
||||
repeated DocumentPositionData documentPositionData = 1;
|
||||
}
|
||||
|
||||
message DocumentPositionData {
|
||||
// Identifier of the text block.
|
||||
int64 id = 1;
|
||||
|
||||
// For each string coordinate in the search text of the text block, the array contains an entry relating the string coordinate to the position coordinate.
|
||||
// This is required due to the text and position coordinates not being equal.
|
||||
repeated int32 stringIdxToPositionIdx = 2;
|
||||
|
||||
// The bounding box for each glyph as a rectangle. This matrix is of size (n,4), where n is the number of glyphs in the text block.
|
||||
// The second dimension specifies the rectangle with the value x, y, width, height, with x, y specifying the lower left corner.
|
||||
// In order to access this information, the stringIdxToPositionIdx array must be used to transform the coordinates.
|
||||
repeated Position positions = 3;
|
||||
|
||||
// Definition of a BoundingBox that contains x, y, width, and height.
|
||||
message Position {
|
||||
repeated float value = 1;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,12 @@
|
||||
syntax = "proto3";
|
||||
|
||||
option java_outer_classname = "DocumentStructureProto";
|
||||
option java_package = "com.iqser.red.service.redaction.v1.server.data";
|
||||
|
||||
|
||||
import "EntryData.proto";
|
||||
|
||||
message DocumentStructure {
|
||||
// The root EntryData represents the Document.
|
||||
EntryData root = 1;
|
||||
}
|
||||
@ -0,0 +1,40 @@
|
||||
syntax = "proto3";
|
||||
|
||||
import "Range.proto";
|
||||
|
||||
option java_outer_classname = "DocumentTextDataProto";
|
||||
option java_package = "com.iqser.red.service.redaction.v1.server.data";
|
||||
|
||||
message AllDocumentTextData {
|
||||
|
||||
repeated DocumentTextData documentTextData = 1;
|
||||
}
|
||||
|
||||
message DocumentTextData {
|
||||
// Identifier of the text block.
|
||||
int64 id = 1;
|
||||
|
||||
// The page the text block occurs on.
|
||||
int64 page = 2;
|
||||
|
||||
// The text of the text block.
|
||||
string searchText = 3;
|
||||
|
||||
// Each text block is assigned a number on a page, starting from 0.
|
||||
int32 numberOnPage = 4;
|
||||
|
||||
// The text blocks are ordered, this number represents the start of the text block as a string offset.
|
||||
int32 start = 5;
|
||||
|
||||
// The text blocks are ordered, this number represents the end of the text block as a string offset.
|
||||
int32 end = 6;
|
||||
|
||||
// The line breaks in the text of this semantic node in string offsets. They are exclusive end. At the end of each semantic node there is an implicit linebreak.
|
||||
repeated int32 lineBreaks = 7;
|
||||
|
||||
// The text ranges where the text is italic
|
||||
repeated Range italicTextRanges = 8;
|
||||
|
||||
// The text ranges where the text is bold
|
||||
repeated Range boldTextRanges = 9;
|
||||
}
|
||||
@ -0,0 +1,30 @@
|
||||
syntax = "proto3";
|
||||
|
||||
import "LayoutEngine.proto";
|
||||
import "NodeType.proto";
|
||||
|
||||
option java_outer_classname = "EntryDataProto";
|
||||
option java_package = "com.iqser.red.service.redaction.v1.server.data";
|
||||
|
||||
message EntryData {
|
||||
// Type of the semantic node.
|
||||
NodeType type = 1;
|
||||
|
||||
// Specifies the position in the parsed tree structure.
|
||||
repeated int32 treeId = 2;
|
||||
|
||||
// Specifies the text block IDs associated with this semantic node.
|
||||
repeated int64 atomicBlockIds = 3;
|
||||
|
||||
// Specifies the pages this semantic node appears on.
|
||||
repeated int64 pageNumbers = 4;
|
||||
|
||||
// Some semantic nodes have additional information, this information is stored in this Map.
|
||||
map<string, string> properties = 5;
|
||||
|
||||
// All child Entries of this Entry.
|
||||
repeated EntryData children = 6;
|
||||
|
||||
// Describes the origin of the semantic node.
|
||||
repeated LayoutEngine engines = 7;
|
||||
}
|
||||
@ -0,0 +1,10 @@
|
||||
syntax = "proto3";
|
||||
|
||||
option java_outer_classname = "LayoutEngineProto";
|
||||
option java_package = "com.iqser.red.service.redaction.v1.server.data";
|
||||
|
||||
enum LayoutEngine {
|
||||
ALGORITHM = 0;
|
||||
AI = 1;
|
||||
OUTLINE = 2;
|
||||
}
|
||||
@ -0,0 +1,19 @@
|
||||
syntax = "proto3";
|
||||
|
||||
option java_outer_classname = "NodeTypeProto";
|
||||
option java_package = "com.iqser.red.service.redaction.v1.server.data";
|
||||
|
||||
enum NodeType {
|
||||
DOCUMENT = 0;
|
||||
SECTION = 1;
|
||||
SUPER_SECTION = 2;
|
||||
HEADLINE = 3;
|
||||
PARAGRAPH = 4;
|
||||
TABLE = 5;
|
||||
TABLE_CELL = 6;
|
||||
IMAGE = 7;
|
||||
HEADER = 8;
|
||||
FOOTER = 9;
|
||||
TABLE_OF_CONTENTS = 10;
|
||||
TABLE_OF_CONTENTS_ITEM = 11;
|
||||
}
|
||||
14
redaction-service-v1/document/src/main/resources/Range.proto
Normal file
14
redaction-service-v1/document/src/main/resources/Range.proto
Normal file
@ -0,0 +1,14 @@
|
||||
syntax = "proto3";
|
||||
|
||||
option java_outer_classname = "RangeProto";
|
||||
option java_package = "com.iqser.red.service.redaction.v1.server.data";
|
||||
|
||||
|
||||
|
||||
message Range {
|
||||
// A start index.
|
||||
int32 start = 1;
|
||||
|
||||
// An end index.
|
||||
int32 end = 2;
|
||||
}
|
||||
@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Minimum required protoc version
|
||||
MIN_VERSION="28.3"
|
||||
|
||||
# Get the installed protoc version
|
||||
INSTALLED_VERSION=$(protoc --version | awk '{print $2}')
|
||||
|
||||
# Function to compare versions
|
||||
version_lt() {
|
||||
[ "$(printf '%s\n' "$1" "$2" | sort -V | head -n1)" != "$1" ]
|
||||
}
|
||||
|
||||
# Check if protoc is installed and meets the minimum version
|
||||
if ! command -v protoc &> /dev/null; then
|
||||
echo "Error: protoc is not installed. Please install version $MIN_VERSION or later."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if version_lt "$INSTALLED_VERSION" "$MIN_VERSION"; then
|
||||
echo "Error: protoc version $INSTALLED_VERSION is too old. Please upgrade to version $MIN_VERSION or later."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Generate Java files from proto files
|
||||
protoc --java_out=../java ./*.proto
|
||||
@ -0,0 +1,33 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.data.LayoutEngineProto;
|
||||
|
||||
public class LayoutEngineMappingTest {
|
||||
|
||||
@Test
|
||||
public void assertAllValuesMatch() {
|
||||
|
||||
for (LayoutEngine value : LayoutEngine.values()) {
|
||||
var engine = LayoutEngineProto.LayoutEngine.valueOf(value.name());
|
||||
assertEquals(engine.name(), value.name());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void assertAllValuesMatchReverse() {
|
||||
|
||||
for (LayoutEngineProto.LayoutEngine value : LayoutEngineProto.LayoutEngine.values()) {
|
||||
if (value.equals(LayoutEngineProto.LayoutEngine.UNRECOGNIZED)) {
|
||||
continue;
|
||||
}
|
||||
var engine = LayoutEngine.valueOf(value.name());
|
||||
assertEquals(engine.name(), value.name());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,33 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.data.NodeTypeProto;
|
||||
|
||||
public class NodeTypeMappingTest {
|
||||
|
||||
@Test
|
||||
public void assertAllValuesMatch() {
|
||||
|
||||
for (NodeType value : NodeType.values()) {
|
||||
var engine = NodeTypeProto.NodeType.valueOf(value.name());
|
||||
assertEquals(engine.name(), value.name());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void assertAllValuesMatchReverse() {
|
||||
|
||||
for (NodeTypeProto.NodeType value : NodeTypeProto.NodeType.values()) {
|
||||
if (value.equals(NodeTypeProto.NodeType.UNRECOGNIZED)) {
|
||||
continue;
|
||||
}
|
||||
var engine = NodeType.valueOf(value.name());
|
||||
assertEquals(engine.name(), value.name());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,144 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class SectionIdentifierTest {
|
||||
|
||||
@Test
|
||||
void testSectionIdentifier() {
|
||||
|
||||
SectionIdentifier identifier = SectionIdentifier.fromSearchText("1.1.2: Headline");
|
||||
assertEquals(SectionIdentifier.Format.NUMERICAL, identifier.getFormat());
|
||||
assertEquals(3, identifier.level());
|
||||
assertEquals(List.of(1, 1, 2), identifier.getIdentifiers());
|
||||
|
||||
SectionIdentifier child = SectionIdentifier.asChildOf(identifier);
|
||||
assertTrue(child.isChildOf(identifier));
|
||||
|
||||
SectionIdentifier parent = SectionIdentifier.fromSearchText("1.1: Headline");
|
||||
assertTrue(parent.isParentOf(identifier));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testSectionIdentifier2() {
|
||||
|
||||
SectionIdentifier identifier = SectionIdentifier.fromSearchText("A.1.2: Headline");
|
||||
assertEquals(SectionIdentifier.Format.ALPHANUMERIC, identifier.getFormat());
|
||||
assertEquals(3, identifier.level());
|
||||
assertEquals(List.of(1, 1, 2), identifier.getIdentifiers());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testSectionIdentifier3() {
|
||||
|
||||
SectionIdentifier identifier = SectionIdentifier.fromSearchText("D.1.2: Headline");
|
||||
assertEquals(SectionIdentifier.Format.ALPHANUMERIC, identifier.getFormat());
|
||||
assertEquals(3, identifier.level());
|
||||
assertEquals(List.of(4, 1, 2), identifier.getIdentifiers());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testSectionIdentifier4() {
|
||||
|
||||
SectionIdentifier identifier = SectionIdentifier.fromSearchText("4.1.2.4: Headline");
|
||||
assertEquals(SectionIdentifier.Format.NUMERICAL, identifier.getFormat());
|
||||
assertEquals(4, identifier.level());
|
||||
assertEquals(List.of(4, 1, 2, 4), identifier.getIdentifiers());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testSectionIdentifier5() {
|
||||
|
||||
SectionIdentifier identifier = SectionIdentifier.fromSearchText("D.1.2.4.5: Headline");
|
||||
assertEquals(SectionIdentifier.Format.ALPHANUMERIC, identifier.getFormat());
|
||||
assertEquals(4, identifier.level());
|
||||
assertEquals(List.of(4, 1, 2, 4), identifier.getIdentifiers());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testSectionIdentifier6() {
|
||||
|
||||
SectionIdentifier identifier = SectionIdentifier.fromSearchText("d.1.2.4.5: Headline");
|
||||
assertEquals(SectionIdentifier.Format.ALPHANUMERIC, identifier.getFormat());
|
||||
assertEquals(4, identifier.level());
|
||||
assertEquals(List.of(4, 1, 2, 4), identifier.getIdentifiers());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testSectionIdentifier7() {
|
||||
|
||||
SectionIdentifier identifier = SectionIdentifier.fromSearchText("4.1.2.4.5: Headline");
|
||||
assertEquals(SectionIdentifier.Format.NUMERICAL, identifier.getFormat());
|
||||
assertEquals(4, identifier.level());
|
||||
assertEquals(List.of(4, 1, 2, 4), identifier.getIdentifiers());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testFalsePositive111() {
|
||||
|
||||
SectionIdentifier identifier = SectionIdentifier.fromSearchText("111: Headline");
|
||||
assertEquals(SectionIdentifier.Format.NUMERICAL, identifier.getFormat());
|
||||
assertEquals(1, identifier.level());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testParentOf() {
|
||||
|
||||
var headline = SectionIdentifier.fromSearchText("1 Did you ever hear the tragedy of Darth Plagueis The Wise?");
|
||||
var headline1 = SectionIdentifier.fromSearchText("1.0 I thought not. It’s not a story the Jedi would tell you.");
|
||||
var headline2 = SectionIdentifier.fromSearchText("1.1 It’s a Sith legend. Darth Plagueis was a Dark Lord of the Sith, ");
|
||||
var headline3 = SectionIdentifier.fromSearchText("1.2.3 so powerful and so wise he could use the Force to influence the midichlorians to create life…");
|
||||
var headline4 = SectionIdentifier.fromSearchText("1.2.3.4 He had such a knowledge of the dark side that he could even keep the ones he cared about from dying.");
|
||||
var headline5 = SectionIdentifier.fromSearchText("1.2.3.4.5 The dark side of the Force is a pathway to many abilities some consider to be unnatural.");
|
||||
var headline6 = SectionIdentifier.fromSearchText("2.0 He became so powerful…");
|
||||
var headline7 = SectionIdentifier.fromSearchText("10000.0 the only thing he was afraid of was losing his power,");
|
||||
var headline8 = SectionIdentifier.fromSearchText("A.0 which eventually, of course, he did.");
|
||||
var headline9 = SectionIdentifier.fromSearchText("Unfortunately, he taught his apprentice everything he knew, then his apprentice killed him in his sleep.");
|
||||
var headline10 = SectionIdentifier.fromSearchText("2.1.2 Ironic.");
|
||||
var headline11 = SectionIdentifier.fromSearchText("2.He could save others from death,");
|
||||
var headline12 = SectionIdentifier.fromSearchText(" 2. but not himself.");
|
||||
|
||||
var paragraph1 = SectionIdentifier.asChildOf(headline);
|
||||
assertTrue(paragraph1.isChildOf(headline));
|
||||
assertTrue(headline.isParentOf(paragraph1));
|
||||
assertFalse(paragraph1.isParentOf(headline));
|
||||
|
||||
assertFalse(headline.isParentOf(headline1));
|
||||
assertTrue(headline.isParentOf(headline2));
|
||||
assertTrue(headline.isParentOf(headline3));
|
||||
assertTrue(headline.isParentOf(headline4));
|
||||
assertTrue(headline.isParentOf(headline5));
|
||||
assertTrue(headline1.isParentOf(headline2));
|
||||
assertFalse(headline1.isParentOf(headline1));
|
||||
assertTrue(headline3.isParentOf(headline4));
|
||||
assertFalse(headline4.isParentOf(headline5));
|
||||
assertFalse(headline2.isParentOf(headline3));
|
||||
assertFalse(headline2.isParentOf(headline4));
|
||||
assertTrue(headline1.isParentOf(headline3));
|
||||
assertTrue(headline1.isParentOf(headline4));
|
||||
assertFalse(headline1.isParentOf(headline6));
|
||||
assertFalse(headline1.isParentOf(headline7));
|
||||
assertFalse(headline8.isParentOf(headline1));
|
||||
assertFalse(headline8.isParentOf(headline2));
|
||||
assertFalse(headline8.isParentOf(headline3));
|
||||
assertFalse(headline8.isParentOf(headline4));
|
||||
assertFalse(headline9.isParentOf(headline9));
|
||||
assertTrue(headline10.isChildOf(headline11));
|
||||
assertTrue(headline10.isChildOf(headline12));
|
||||
}
|
||||
|
||||
}
|
||||
@ -4,7 +4,7 @@ plugins {
|
||||
}
|
||||
|
||||
description = "redaction-service-api-v1"
|
||||
val persistenceServiceVersion = "2.439.0"
|
||||
val persistenceServiceVersion = "2.631.0"
|
||||
|
||||
dependencies {
|
||||
implementation("org.springframework:spring-web:6.0.12")
|
||||
|
||||
@ -2,12 +2,18 @@ package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
public class QueueNames {
|
||||
|
||||
public static final String REDACTION_QUEUE = "redactionQueue";
|
||||
public static final String REDACTION_PRIORITY_QUEUE = "redactionPriorityQueue";
|
||||
public static final String REDACTION_ANALYSIS_RESPONSE_QUEUE = "redactionAnalysisResponseQueue";
|
||||
public static final String REDACTION_DQL = "redactionDQL";
|
||||
public static final String REDACTION_REQUEST_QUEUE_PREFIX = "redaction_request";
|
||||
public static final String REDACTION_REQUEST_EXCHANGE = "redaction_request_exchange";
|
||||
public static final String REDACTION_PRIORITY_REQUEST_QUEUE_PREFIX = "redaction_priority_request";
|
||||
public static final String REDACTION_PRIORITY_REQUEST_EXCHANGE = "redaction_priority_request_exchange";
|
||||
public static final String REDACTION_RESPONSE_EXCHANGE = "redaction_response_exchange";
|
||||
public static final String REDACTION_DLQ = "redaction_error";
|
||||
|
||||
public static final String MIGRATION_QUEUE = "migrationQueue";
|
||||
public static final String SEARCH_TERM_OCCURRENCES_RESPONSE_EXCHANGE = "search_bulk_local_term_response_exchange";
|
||||
public static final String SEARCH_BULK_LOCAL_TERM_DLQ = "search_bulk_local_term_error";
|
||||
|
||||
|
||||
public static final String MIGRATION_REQUEST_QUEUE = "migrationQueue";
|
||||
public static final String MIGRATION_RESPONSE_QUEUE = "migrationResponseQueue";
|
||||
public static final String MIGRATION_DLQ = "migrationDLQ";
|
||||
|
||||
|
||||
@ -12,14 +12,16 @@ plugins {
|
||||
description = "redaction-service-server-v1"
|
||||
|
||||
|
||||
val layoutParserVersion = "0.141.0"
|
||||
val layoutParserVersion = "0.193.0"
|
||||
val jacksonVersion = "2.15.2"
|
||||
val droolsVersion = "9.44.0.Final"
|
||||
val pdfBoxVersion = "3.0.0"
|
||||
val persistenceServiceVersion = "2.444.0"
|
||||
val persistenceServiceVersion = "2.641.0"
|
||||
val llmServiceVersion = "1.20.0-RED10072.2"
|
||||
val springBootStarterVersion = "3.1.5"
|
||||
val springCloudVersion = "4.0.4"
|
||||
val testContainersVersion = "1.19.7"
|
||||
val tomcatVersion = "10.1.18"
|
||||
|
||||
configurations {
|
||||
all {
|
||||
@ -32,21 +34,31 @@ configurations {
|
||||
dependencies {
|
||||
|
||||
implementation(project(":redaction-service-api-v1")) { exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1") }
|
||||
implementation(project(":document"))
|
||||
implementation("com.iqser.red.service:persistence-service-internal-api-v1:${persistenceServiceVersion}") { exclude(group = "org.springframework.boot") }
|
||||
implementation("com.iqser.red.service:persistence-service-shared-mongo-v1:${persistenceServiceVersion}")
|
||||
{
|
||||
exclude(group = "com.knecon.fforesight", module = "tenant-commons")
|
||||
}
|
||||
implementation("com.knecon.fforesight:layoutparser-service-internal-api:${layoutParserVersion}")
|
||||
|
||||
implementation("com.knecon.fforesight:llm-service-api:${llmServiceVersion}")
|
||||
implementation("com.iqser.red.commons:spring-commons:6.2.0")
|
||||
implementation("com.iqser.red.commons:metric-commons:2.3.0")
|
||||
|
||||
implementation("com.iqser.red.commons:dictionary-merge-commons:1.5.0")
|
||||
implementation("com.iqser.red.commons:storage-commons:2.45.0")
|
||||
implementation("com.knecon.fforesight:tenant-commons:0.24.0")
|
||||
implementation("com.iqser.red.commons:storage-commons:2.50.0")
|
||||
implementation("com.knecon.fforesight:tenant-commons:0.31.0")
|
||||
implementation("com.knecon.fforesight:keycloak-commons:0.30.0") {
|
||||
exclude(group = "com.knecon.fforesight", module = "tenant-commons")
|
||||
}
|
||||
implementation("com.knecon.fforesight:tracing-commons:0.5.0")
|
||||
implementation("com.knecon.fforesight:lifecycle-commons:0.7.0")
|
||||
|
||||
implementation("com.fasterxml.jackson.module:jackson-module-afterburner:${jacksonVersion}")
|
||||
implementation("com.fasterxml.jackson.datatype:jackson-datatype-jsr310:${jacksonVersion}")
|
||||
implementation("org.ahocorasick:ahocorasick:0.6.3")
|
||||
implementation("org.ahocorasick:ahocorasick:0.9.0")
|
||||
implementation("com.hankcs:aho-corasick-double-array-trie:1.2.2")
|
||||
implementation("com.github.roklenarcic:aho-corasick:1.2")
|
||||
implementation("org.javassist:javassist:3.29.2-GA")
|
||||
|
||||
implementation("org.drools:drools-engine:${droolsVersion}")
|
||||
@ -60,8 +72,16 @@ dependencies {
|
||||
implementation("org.springframework.boot:spring-boot-starter-cache:${springBootStarterVersion}")
|
||||
implementation("org.springframework.boot:spring-boot-starter-data-redis:${springBootStarterVersion}")
|
||||
|
||||
implementation("org.springframework.boot:spring-boot-starter-websocket:${springBootStarterVersion}")
|
||||
implementation("org.springframework.security:spring-security-messaging:6.1.3")
|
||||
implementation("org.apache.tomcat:tomcat-websocket:${tomcatVersion}")
|
||||
implementation("org.apache.tomcat.embed:tomcat-embed-core:${tomcatVersion}")
|
||||
|
||||
implementation("org.liquibase:liquibase-core:4.29.2") // Needed to be set explicit, otherwise spring dependency management sets it to 4.20.0
|
||||
implementation("org.liquibase.ext:liquibase-mongodb:4.29.2")
|
||||
|
||||
implementation("net.logstash.logback:logstash-logback-encoder:7.4")
|
||||
implementation("ch.qos.logback:logback-classic")
|
||||
api("ch.qos.logback:logback-classic")
|
||||
|
||||
implementation("org.reflections:reflections:0.10.2")
|
||||
|
||||
@ -82,7 +102,12 @@ dependencies {
|
||||
group = "com.iqser.red.service",
|
||||
module = "persistence-service-shared-api-v1"
|
||||
)
|
||||
exclude(
|
||||
group = "com.knecon.fforesight",
|
||||
module = "document"
|
||||
)
|
||||
}
|
||||
testImplementation("com.pdftron:PDFNet:10.11.0")
|
||||
}
|
||||
|
||||
dependencyManagement {
|
||||
@ -107,6 +132,7 @@ tasks.named<BootBuildImage>("bootBuildImage") {
|
||||
"BPE_APPEND_JAVA_TOOL_OPTIONS",
|
||||
"-XX:MaxMetaspaceSize=1g -Dfile.encoding=UTF-8 -Dkie.repository.project.cache.size=50 -Dkie.repository.project.versions.cache.size=5"
|
||||
)
|
||||
environment.put("BPE_DEFAULT_LANG", "en_US.utf8") // java.text.Normalizer does not care for file.encoding
|
||||
|
||||
imageName.set("nexus.knecon.com:5001/red/${project.name}")// must build image with same name always, otherwise the builder will not know which image to use as cache. DO NOT CHANGE!
|
||||
if (project.hasProperty("buildbootDockerHostNetwork")) {
|
||||
@ -130,18 +156,19 @@ tasks.named<BootBuildImage>("bootBuildImage") {
|
||||
}
|
||||
}
|
||||
|
||||
fun parseDroolsImports(droolsFilePath: String): List<String> {
|
||||
|
||||
fun parseDroolsImports(vararg droolsFilePaths: String): List<String> {
|
||||
val imports = mutableListOf<String>()
|
||||
val importPattern = Regex("^import\\s+(com\\.iqser\\.red\\.service\\.redaction\\.v1\\.[\\w.]+);")
|
||||
val desiredPrefix = "com.iqser.red.service.redaction.v1"
|
||||
|
||||
File(droolsFilePath).forEachLine { line ->
|
||||
importPattern.find(line)?.let { matchResult ->
|
||||
val importPath = matchResult.groupValues[1].trim()
|
||||
if (importPath.startsWith(desiredPrefix)) {
|
||||
val formattedPath = importPath.replace('.', '/')
|
||||
imports.add("$formattedPath.java")
|
||||
droolsFilePaths.forEach { filePath ->
|
||||
File(filePath).forEachLine { line ->
|
||||
importPattern.find(line)?.let { matchResult ->
|
||||
val importPath = matchResult.groupValues[1].trim()
|
||||
if (importPath.startsWith(desiredPrefix)) {
|
||||
val formattedPath = importPath.replace('.', '/')
|
||||
imports.add("$formattedPath.java")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -149,22 +176,30 @@ fun parseDroolsImports(droolsFilePath: String): List<String> {
|
||||
return imports
|
||||
}
|
||||
|
||||
val droolsImports = parseDroolsImports("redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/all_rules_documine.drl")
|
||||
// Combine imports from both drools files
|
||||
val droolsImports = parseDroolsImports(
|
||||
"redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/all_rules_documine.drl",
|
||||
"redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/base_component_rules.drl"
|
||||
)
|
||||
|
||||
tasks.register("generateJavaDoc", Javadoc::class) {
|
||||
|
||||
dependsOn("compileJava")
|
||||
dependsOn("delombok")
|
||||
classpath = project.sourceSets["main"].runtimeClasspath
|
||||
source = fileTree("${buildDir}/generated/sources/delombok/java/main") {
|
||||
val documentFiles = fileTree("${project(":document").layout.buildDirectory.get()}/generated/sources/delombok/java/main") {
|
||||
include(droolsImports)
|
||||
}
|
||||
destinationDir = file(project.findProperty("javadocDestinationDir")?.toString() ?: "")
|
||||
val mainFiles = fileTree("${layout.buildDirectory.get()}/generated/sources/delombok/java/main") {
|
||||
include(droolsImports)
|
||||
}
|
||||
source = documentFiles + mainFiles
|
||||
|
||||
setDestinationDir(file(project.findProperty("javadocDestinationDir")?.toString() ?: ""))
|
||||
|
||||
options.memberLevel = JavadocMemberLevel.PUBLIC
|
||||
(options as StandardJavadocDocletOptions).apply {
|
||||
header = "Redaction Service ${project.version}"
|
||||
footer = "Redaction Service ${project.version}"
|
||||
title = "API Documentation for Redaction Service ${project.version}"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -9,10 +9,12 @@ import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.liquibase.LiquibaseAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.mongo.MongoAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.task.TaskExecutionAutoConfiguration;
|
||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||
import org.springframework.cache.annotation.EnableCaching;
|
||||
import org.springframework.cloud.openfeign.EnableFeignClients;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.EnableAspectJAutoProxy;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.data.mongodb.repository.config.EnableMongoRepositories;
|
||||
|
||||
@ -20,6 +22,8 @@ import com.iqser.red.service.dictionarymerge.commons.DictionaryMergeService;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.mongo.SharedMongoAutoConfiguration;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||
import com.knecon.fforesight.keycloakcommons.DefaultKeyCloakCommonsAutoConfiguration;
|
||||
import com.knecon.fforesight.lifecyclecommons.LifecycleAutoconfiguration;
|
||||
import com.knecon.fforesight.mongo.database.commons.MongoDatabaseCommonsAutoConfiguration;
|
||||
import com.knecon.fforesight.mongo.database.commons.liquibase.EnableMongoLiquibase;
|
||||
import com.knecon.fforesight.tenantcommons.MultiTenancyAutoConfiguration;
|
||||
@ -32,13 +36,14 @@ import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@EnableCaching
|
||||
@ImportAutoConfiguration({MultiTenancyAutoConfiguration.class, SharedMongoAutoConfiguration.class})
|
||||
@ImportAutoConfiguration({MultiTenancyAutoConfiguration.class, SharedMongoAutoConfiguration.class, DefaultKeyCloakCommonsAutoConfiguration.class, LifecycleAutoconfiguration.class})
|
||||
@Import({MetricsConfiguration.class, StorageAutoConfiguration.class, MongoDatabaseCommonsAutoConfiguration.class})
|
||||
@EnableFeignClients(basePackageClasses = RulesClient.class)
|
||||
@EnableConfigurationProperties(RedactionServiceSettings.class)
|
||||
@EnableMongoRepositories(basePackages = "com.iqser.red.service.persistence")
|
||||
@EnableMongoLiquibase
|
||||
@SpringBootApplication(exclude = {SecurityAutoConfiguration.class, ManagementWebSecurityAutoConfiguration.class, DataSourceAutoConfiguration.class, LiquibaseAutoConfiguration.class, MongoAutoConfiguration.class, MongoDataAutoConfiguration.class})
|
||||
@EnableAspectJAutoProxy
|
||||
public class Application {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
@ -10,11 +10,13 @@ import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.reflections.Reflections;
|
||||
import org.reflections.scanners.Scanners;
|
||||
import org.reflections.util.ConfigurationBuilder;
|
||||
import org.reflections.util.FilterBuilder;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.SearchImplementation;
|
||||
|
||||
@ -25,6 +27,8 @@ import lombok.extern.slf4j.Slf4j;
|
||||
public class DeprecatedElementsFinder {
|
||||
|
||||
public static final String PACKAGE_NAME = "com.iqser.red.service.redaction.v1.server";
|
||||
public static final Pattern DATA_PACKAGE = Pattern.compile(".*/data/.*");
|
||||
|
||||
private Set<Method> deprecatedMethods;
|
||||
@Getter
|
||||
private Map<String, String> deprecatedMethodsSignaturesMap;
|
||||
@ -43,7 +47,10 @@ public class DeprecatedElementsFinder {
|
||||
|
||||
Reflections reflections = new Reflections(new ConfigurationBuilder().forPackage(PACKAGE_NAME)
|
||||
.setExpandSuperTypes(true)
|
||||
.setScanners(Scanners.MethodsAnnotated, Scanners.TypesAnnotated, Scanners.SubTypes));
|
||||
.setScanners(Scanners.MethodsAnnotated, Scanners.TypesAnnotated, Scanners.SubTypes)
|
||||
.filterInputsBy(new FilterBuilder().includePackage(PACKAGE_NAME).excludePackage(PACKAGE_NAME + ".data")
|
||||
// Exclude the generated proto data package
|
||||
));
|
||||
|
||||
deprecatedMethods = reflections.get(Scanners.MethodsAnnotated.with(Deprecated.class).as(Method.class));
|
||||
|
||||
|
||||
@ -22,18 +22,28 @@ public class RedactionServiceSettings {
|
||||
|
||||
private boolean nerServiceEnabled = true;
|
||||
|
||||
private boolean azureNerServiceEnabled;
|
||||
|
||||
private boolean llmNerServiceEnabled;
|
||||
|
||||
private boolean priorityMode;
|
||||
|
||||
private long firstLevelDictionaryCacheMaximumSize = 1000;
|
||||
|
||||
private long dictionaryCacheMaximumSize = 100;
|
||||
|
||||
private int dictionaryCacheExpireAfterAccessDays = 3;
|
||||
|
||||
private int droolsExecutionTimeoutSecs = 300;
|
||||
private int droolsExecutionTimeoutSecs = 600;
|
||||
|
||||
private boolean ruleExecutionSecured = true;
|
||||
|
||||
private boolean annotationMode;
|
||||
|
||||
private boolean droolsDebug;
|
||||
|
||||
private boolean protobufJsonFallback = true;
|
||||
|
||||
|
||||
public int getDroolsExecutionTimeoutSecs(int numberOfPages) {
|
||||
|
||||
|
||||
@ -0,0 +1,10 @@
|
||||
package com.iqser.red.service.redaction.v1.server.client;
|
||||
|
||||
import org.springframework.cloud.openfeign.FeignClient;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.internal.resources.DateFormatsResource;
|
||||
|
||||
@FeignClient(name = "DateFormatsResource", url = "${persistence-service.url}")
|
||||
public interface DateFormatsClient extends DateFormatsResource {
|
||||
|
||||
}
|
||||
@ -15,5 +15,6 @@ public class EntityRecognitionEntity {
|
||||
private int startOffset;
|
||||
private int endOffset;
|
||||
private String type;
|
||||
private Double confidence;
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,42 @@
|
||||
package com.iqser.red.service.redaction.v1.server.config;
|
||||
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.data.redis.connection.RedisConnectionFactory;
|
||||
import org.springframework.data.redis.listener.PatternTopic;
|
||||
import org.springframework.data.redis.listener.RedisMessageListenerContainer;
|
||||
import org.springframework.data.redis.listener.adapter.MessageListenerAdapter;
|
||||
import org.springframework.messaging.simp.SimpMessagingTemplate;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.redaction.v1.server.service.websocket.RedisPubsubReceiver;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@Configuration
|
||||
@RequiredArgsConstructor
|
||||
public class RedisPubsubConfiguration {
|
||||
|
||||
private final SimpMessagingTemplate template;
|
||||
private final ObjectMapper mapper;
|
||||
private final RedisConnectionFactory connectionFactory;
|
||||
|
||||
@Bean
|
||||
public RedisPubsubReceiver redisPubsubReceiver() {
|
||||
return new RedisPubsubReceiver(template, mapper);
|
||||
}
|
||||
|
||||
@Bean
|
||||
public MessageListenerAdapter redisPubsubListenerAdapter() {
|
||||
return new MessageListenerAdapter(redisPubsubReceiver(), "receiveMessage");
|
||||
}
|
||||
|
||||
@Bean
|
||||
public RedisMessageListenerContainer redisPubsubContainer() {
|
||||
RedisMessageListenerContainer container = new RedisMessageListenerContainer();
|
||||
container.setConnectionFactory(connectionFactory);
|
||||
container.addMessageListener(redisPubsubListenerAdapter(), new PatternTopic("redaction-service-websocket-messages"));
|
||||
return container;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,95 @@
|
||||
package com.iqser.red.service.redaction.v1.server.config;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.tomcat.websocket.server.WsSci;
|
||||
import org.springframework.boot.web.embedded.tomcat.TomcatContextCustomizer;
|
||||
import org.springframework.boot.web.embedded.tomcat.TomcatServletWebServerFactory;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.messaging.Message;
|
||||
import org.springframework.messaging.MessageChannel;
|
||||
import org.springframework.messaging.simp.config.ChannelRegistration;
|
||||
import org.springframework.messaging.simp.config.MessageBrokerRegistry;
|
||||
import org.springframework.messaging.simp.stomp.StompCommand;
|
||||
import org.springframework.messaging.simp.stomp.StompHeaderAccessor;
|
||||
import org.springframework.messaging.support.ChannelInterceptor;
|
||||
import org.springframework.messaging.support.MessageHeaderAccessor;
|
||||
import org.springframework.security.authentication.AuthenticationManager;
|
||||
import org.springframework.security.oauth2.server.resource.authentication.BearerTokenAuthenticationToken;
|
||||
import org.springframework.security.oauth2.server.resource.authentication.JwtAuthenticationToken;
|
||||
import org.springframework.web.socket.config.annotation.EnableWebSocketMessageBroker;
|
||||
import org.springframework.web.socket.config.annotation.StompEndpointRegistry;
|
||||
import org.springframework.web.socket.config.annotation.WebSocketMessageBrokerConfigurer;
|
||||
|
||||
import com.knecon.fforesight.keycloakcommons.security.TenantAuthenticationManagerResolver;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Configuration
|
||||
@EnableWebSocketMessageBroker
|
||||
@RequiredArgsConstructor
|
||||
public class WebSocketConfiguration implements WebSocketMessageBrokerConfigurer {
|
||||
|
||||
private final TenantAuthenticationManagerResolver tenantAuthenticationManagerResolver;
|
||||
|
||||
|
||||
@Override
|
||||
public void configureMessageBroker(MessageBrokerRegistry config) {
|
||||
|
||||
config.enableSimpleBroker("/topic");
|
||||
config.setApplicationDestinationPrefixes("/app");
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void registerStompEndpoints(StompEndpointRegistry registry) {
|
||||
|
||||
registry.addEndpoint("/api/rules-logging/rulesocket").setAllowedOrigins("*");
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void configureClientInboundChannel(ChannelRegistration registration) {
|
||||
|
||||
// https://docs.spring.io/spring-framework/reference/web/websocket/stomp/authentication-token-based.html
|
||||
registration.interceptors(new ChannelInterceptor() {
|
||||
@Override
|
||||
public Message<?> preSend(Message<?> message, MessageChannel channel) {
|
||||
|
||||
StompHeaderAccessor accessor = MessageHeaderAccessor.getAccessor(message, StompHeaderAccessor.class);
|
||||
if (StompCommand.CONNECT.equals(accessor.getCommand())) {
|
||||
Optional.ofNullable(accessor.getNativeHeader("Authorization"))
|
||||
.ifPresent(ah -> {
|
||||
String bearerToken = ah.get(0).replace("Bearer ", "");
|
||||
log.info("Received bearer token {}", bearerToken);
|
||||
AuthenticationManager authenticationManager = tenantAuthenticationManagerResolver.resolve(bearerToken);
|
||||
JwtAuthenticationToken token = (JwtAuthenticationToken) authenticationManager.authenticate(new BearerTokenAuthenticationToken(bearerToken));
|
||||
accessor.setUser(token);
|
||||
});
|
||||
}
|
||||
return message;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
public TomcatServletWebServerFactory tomcatContainerFactory() {
|
||||
|
||||
TomcatServletWebServerFactory factory = new TomcatServletWebServerFactory();
|
||||
factory.setTomcatContextCustomizers(Collections.singletonList(tomcatContextCustomizer()));
|
||||
return factory;
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
public TomcatContextCustomizer tomcatContextCustomizer() {
|
||||
|
||||
return context -> context.addServletContainerInitializer(new WsSci(), null);
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,88 @@
|
||||
package com.iqser.red.service.redaction.v1.server.config;
|
||||
|
||||
import java.util.Optional;
|
||||
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.messaging.Message;
|
||||
import org.springframework.messaging.simp.SimpMessageType;
|
||||
import org.springframework.messaging.simp.stomp.StompHeaderAccessor;
|
||||
import org.springframework.security.config.annotation.web.messaging.MessageSecurityMetadataSourceRegistry;
|
||||
import org.springframework.security.config.annotation.web.socket.AbstractSecurityWebSocketMessageBrokerConfigurer;
|
||||
import org.springframework.security.oauth2.server.resource.authentication.JwtAuthenticationToken;
|
||||
|
||||
import com.knecon.fforesight.keycloakcommons.security.TokenUtils;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Configuration
|
||||
public class WebSocketSecurityConfiguration extends AbstractSecurityWebSocketMessageBrokerConfigurer {
|
||||
|
||||
@Value("${cors.enabled:false}")
|
||||
private boolean corsEnabled;
|
||||
|
||||
|
||||
@Override
|
||||
protected void configureInbound(MessageSecurityMetadataSourceRegistry messages) {
|
||||
|
||||
messages.simpTypeMatchers(SimpMessageType.HEARTBEAT, SimpMessageType.UNSUBSCRIBE, SimpMessageType.DISCONNECT)
|
||||
.permitAll()
|
||||
.simpTypeMatchers(SimpMessageType.CONNECT)
|
||||
.anonymous() // this is intended, see WebSocketConfiguration.configureClientInboundChannel
|
||||
.simpTypeMatchers(SimpMessageType.SUBSCRIBE)
|
||||
.access("@tenantWebSocketSecurityMatcher.checkCanSubscribeTo(authentication,message)")
|
||||
.anyMessage()
|
||||
.denyAll();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected boolean sameOriginDisabled() {
|
||||
|
||||
return corsEnabled;
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
public TenantWebSocketSecurityMatcher tenantWebSocketSecurityMatcher() {
|
||||
|
||||
return new TenantWebSocketSecurityMatcher();
|
||||
}
|
||||
|
||||
|
||||
public class TenantWebSocketSecurityMatcher {
|
||||
|
||||
public boolean checkCanSubscribeTo(JwtAuthenticationToken authentication, Message<?> message) {
|
||||
|
||||
var targetedTenant = extractTenantId(message);
|
||||
var currentTenant = getCurrentTenant(authentication);
|
||||
return targetedTenant.isPresent() && currentTenant.isPresent() && currentTenant.get().equals(targetedTenant.get());
|
||||
}
|
||||
|
||||
|
||||
private Optional<String> getCurrentTenant(JwtAuthenticationToken authentication) {
|
||||
|
||||
if (authentication != null && authentication.getToken() != null && authentication.getToken().getTokenValue() != null) {
|
||||
return Optional.of(TokenUtils.toTenant(authentication.getToken().getTokenValue()));
|
||||
} else {
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private Optional<String> extractTenantId(Message<?> message) {
|
||||
|
||||
StompHeaderAccessor sha = StompHeaderAccessor.wrap(message);
|
||||
String topic = sha.getDestination();
|
||||
if (topic == null) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
String tenant = topic.split("/")[2];
|
||||
return Optional.of(tenant);
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,36 @@
|
||||
package com.iqser.red.service.redaction.v1.server.logger;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
import lombok.ToString;
|
||||
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
@Getter
|
||||
@ToString
|
||||
public final class Context {
|
||||
|
||||
private String fileId;
|
||||
private String dossierId;
|
||||
private String dossierTemplateId;
|
||||
@Setter
|
||||
private long ruleVersion;
|
||||
@Setter
|
||||
private long dateFormatsVersion;
|
||||
private int analysisNumber;
|
||||
private String tenantId;
|
||||
|
||||
|
||||
public Context(String fileId, String dossierId, String dossierTemplateId, long ruleVersion, int analysisNumber, String tenantId) {
|
||||
|
||||
this.fileId = fileId;
|
||||
this.dossierId = dossierId;
|
||||
this.dossierTemplateId = dossierTemplateId;
|
||||
this.ruleVersion = ruleVersion;
|
||||
this.analysisNumber = analysisNumber;
|
||||
this.tenantId = tenantId;
|
||||
}
|
||||
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user