Compare commits

..

164 Commits

Author SHA1 Message Date
Dominique Eifländer
fdd2b954fe Merge branch 'OPS-284' into 'master'
OPS-284: add prometheus endpoint

Closes OPS-284

See merge request redactmanager/redaction-service!582
2025-01-24 11:00:50 +01:00
Christoph Schabert
2d3a048487 OPS-284: add prometheus endpoint 2025-01-23 13:39:25 +01:00
Maverick Studer
518c38c2e9 Merge branch 'RED-10687' into 'master'
RED-10687: Filter out overlapping recommendations if they are of the same type

Closes RED-10687

See merge request redactmanager/redaction-service!580
2025-01-08 13:19:37 +01:00
maverickstuder
21097a6419 RED-10687: Filter out overlapping recommendations if they are of the same type 2025-01-08 11:34:38 +01:00
Maverick Studer
c8dd167606 Merge branch 'RED-10633' into 'master'
RED-10633: Duplicated values when extracting from table in DM 1.3.0

Closes RED-10633

See merge request redactmanager/redaction-service!579
2024-12-13 09:24:10 +01:00
maverickstuder
9bd5577986 RED-10633: Duplicated values when extracting from table in DM 1.3.0 2024-12-12 13:23:17 +01:00
Maverick Studer
c1990ef4aa Merge branch 'RED-10639-fp' into 'master'
RED-10639: RM-224: ERROR state for document after re-upload same document with...

Closes RED-10639

See merge request redactmanager/redaction-service!577
2024-12-12 12:57:47 +01:00
Maverick Studer
3dfa05bd67 RED-10639: RM-224: ERROR state for document after re-upload same document with... 2024-12-12 12:57:46 +01:00
Dominique Eifländer
22b2a6474b Merge branch 'RED-10644-master' into 'master'
RED-10644: Fixed dublicated entries with whitespace at the end

Closes RED-10644

See merge request redactmanager/redaction-service!573
2024-12-10 12:41:36 +01:00
Dominique Eifländer
cf21b75f2e RED-10644: Fixed dublicated entries with whitespace at the end 2024-12-10 12:41:36 +01:00
Maverick Studer
a1e6361c3e Merge branch 'feature/RED-10200' into 'master'
RED-10200: Spike performant rules update logic

Closes RED-10200

See merge request redactmanager/redaction-service!572
2024-12-04 14:41:24 +01:00
Maverick Studer
3c2db795c8 RED-10200: Spike performant rules update logic 2024-12-04 14:41:24 +01:00
Dominique Eifländer
ef1810b658 Merge branch 'RED-10526' into 'master'
RED-10526: Set liquibase to 4.29.2 as 4.30.0 is 3 times slower

Closes RED-10526

See merge request redactmanager/redaction-service!571
2024-12-02 11:27:50 +01:00
Dominique Eifländer
26025a5621 RED-10526: Set liquibase to 4.29.2 as 4.30.0 is 3 times slower 2024-12-02 11:27:50 +01:00
Dominique Eifländer
4fa91a59e0 Merge branch 'RED-10526' into 'master'
RED-10526: Upgrade liquibase to 4.30.0

Closes RED-10526

See merge request redactmanager/redaction-service!569
2024-11-27 11:26:32 +01:00
Dominique Eifländer
7c37776af4 RED-10526: Upgrade liquibase to 4.30.0 2024-11-27 11:13:27 +01:00
Corina Olariu
9000f755a3 Merge branch 'RED-3300' into 'master'
RED-3300 Improve impurity rule

Closes RED-3300

See merge request redactmanager/redaction-service!567
2024-11-26 13:10:14 +01:00
Corina Olariu
62ec63cc55 RED-3300 Improve impurity rule 2024-11-26 13:10:14 +01:00
Corina Olariu
db59ae014b Merge branch 'RED-10046' into 'master'
RED-10046 The punctuation mark “.” should be treated as a word boundary when...

Closes RED-10046

See merge request redactmanager/redaction-service!565
2024-11-21 10:20:44 +01:00
Corina Olariu
dfd262e9e1 RED-10046 The punctuation mark “.” should be treated as a word boundary when... 2024-11-21 10:20:44 +01:00
Maverick Studer
4fd36768b2 Merge branch 'hotfix-persistence-dependency' into 'master'
persistence dependency update

See merge request redactmanager/redaction-service!566
2024-11-20 11:34:08 +01:00
Maverick Studer
e04c6dadd7 persistence dependency update 2024-11-20 11:34:07 +01:00
Maverick Studer
213d3bf645 Merge branch 'feature/RED-10115' into 'master'
RED-10115: Refactoring of justifications

Closes RED-10115

See merge request redactmanager/redaction-service!559
2024-11-20 10:53:47 +01:00
Maverick Studer
66f3f6ce59 RED-10115: Refactoring of justifications 2024-11-20 10:53:47 +01:00
Dominique Eifländer
3f606ad567 Merge branch 'RED-10456-master' into 'master'
RED-10456: Enabled to remove imported redactions

Closes RED-10456

See merge request redactmanager/redaction-service!563
2024-11-18 13:18:42 +01:00
Dominique Eifländer
7b1c6beb11 RED-10456: Enabled to remove imported redactions 2024-11-18 12:51:33 +01:00
Kilian Schuettler
e660184646 RED-9139-pageOrderHotfix 2024-11-15 16:33:44 +01:00
Kilian Schuettler
f1f2d02266 RED-9139-pageOrderHotfix 2024-11-15 16:28:13 +01:00
Kilian Schüttler
947cbe4cd2 Merge branch 'RED-9139' into 'master'
RED-9139: refactor some code in DocumentGraphMapper

Closes RED-9139

See merge request redactmanager/redaction-service!560
2024-11-15 15:37:49 +01:00
Kilian Schuettler
e8dc37374e RED-9139: refactor some code in DocumentGraphMapper 2024-11-15 15:25:32 +01:00
Kilian Schuettler
8769922bf2 RED-9139: fix Image IDs 2024-11-15 15:15:49 +01:00
Corina Olariu
21f2ded6c6 Merge branch 'RED-10425' into 'master'
RED-10425 Annotation added twice when bulk-force while auto-analysis is disabled

Closes RED-10425

See merge request redactmanager/redaction-service!557
2024-11-15 09:28:13 +01:00
Corina Olariu
9f20a14aec RED-10425 Annotation added twice when bulk-force while auto-analysis is disabled 2024-11-15 09:28:13 +01:00
Maverick Studer
681d6328ef Merge branch 'RED-10471-fp' into 'master'
RED-10471: PII.11.0 does not redact anymore

Closes RED-10471

See merge request redactmanager/redaction-service!556
2024-11-14 18:41:51 +01:00
Maverick Studer
97c23c367e RED-10471: PII.11.0 does not redact anymore 2024-11-14 18:41:50 +01:00
Kilian Schüttler
1b7c59d292 Merge branch 'feature/RED-9139' into 'master'
RED-9139: move document to its own module, add TableOfContents and TableOfContentsItem

Closes RED-9139

See merge request redactmanager/redaction-service!554
2024-11-14 16:50:42 +01:00
Kilian Schüttler
f9d939958f RED-9139: move document to its own module, add TableOfContents and TableOfContentsItem 2024-11-14 16:50:42 +01:00
Maverick Studer
41f824297c Merge branch 'feature/RED-10290' into 'master'
RED-10290: Improve SearchImplementation logic for dictionaries

Closes RED-10290

See merge request redactmanager/redaction-service!553
2024-11-11 12:10:58 +01:00
Maverick Studer
68f75f070c RED-10290: Improve SearchImplementation logic for dictionaries 2024-11-11 12:10:57 +01:00
Dominique Eifländer
4c19be01c6 Merge branch 'RED-10353-master' into 'master'
RED-10353: Fixed missing errorCode when rules are locked

Closes RED-10353

See merge request redactmanager/redaction-service!552
2024-11-11 11:43:48 +01:00
Dominique Eifländer
98ba463639 RED-10353: Fixed missing errorCode when rules are locked 2024-11-08 12:39:41 +01:00
Maverick Studer
e415234bf8 Merge branch 'feature/RED-10072' into 'master'
RED-10072: AI description field and toggle for entities

Closes RED-10072

See merge request redactmanager/redaction-service!539
2024-11-07 14:43:51 +01:00
Maverick Studer
7f96c7b51e RED-10072: AI description field and toggle for entities 2024-11-07 14:43:51 +01:00
Dominique Eifländer
a0d3c4cf86 Merge branch 'RED-10353-master' into 'master'
RED-10353: Added error code for file that causes the timeout

Closes RED-10353

See merge request redactmanager/redaction-service!550
2024-11-06 11:19:29 +01:00
Dominique Eifländer
d2a768d9f5 RED-10353: Added error code for file that causes the timeout 2024-11-06 11:05:40 +01:00
Corina Olariu
16a7a8b9f4 Merge branch 'RED-10186' into 'master'
RED-10186 Unlinked annotation with manual changes still linked and removed in specific corner case

Closes RED-10186

See merge request redactmanager/redaction-service!548
2024-11-04 14:24:19 +01:00
Corina Olariu
288e0d3c51 RED-10186 Unlinked annotation with manual changes still linked and removed in specific corner case 2024-11-04 14:24:18 +01:00
Dominique Eifländer
c9468f3cf4 Merge branch 'RED-10353-master' into 'master'
RED-10353: Increase drools timeout, different error message for the file that caused the timeout

Closes RED-10353

See merge request redactmanager/redaction-service!547
2024-10-31 13:34:53 +01:00
Dominique Eifländer
1abceb2e20 RED-10353: Increase drools timeout, different error message for the file that caused the timeout 2024-10-31 13:20:31 +01:00
Corina Olariu
f8e2aae6e7 Merge branch 'RED-9774' into 'master'
RED-9774 - Update rules

Closes RED-9774

See merge request redactmanager/redaction-service!544
2024-10-25 07:06:44 +02:00
Corina Olariu
569699139f RED-9774 - Update rules 2024-10-25 07:06:44 +02:00
Kilian Schüttler
45cd7f3d98 Merge branch 'feature/RED-10260' into 'master'
RED-10260: add quoteChar to component mapping

Closes RED-10260

See merge request redactmanager/redaction-service!545
2024-10-24 11:00:57 +02:00
Kilian Schüttler
fa3ba58bd3 RED-10260: add quoteChar to component mapping 2024-10-24 11:00:56 +02:00
Kilian Schüttler
69f7d688d0 Merge branch 'RED-10194' into 'master'
RED-10194: add component mapping related classes to javadoc

Closes RED-10194

See merge request redactmanager/redaction-service!543
2024-10-22 22:19:00 +02:00
Kilian Schüttler
76bab106bc RED-10194: add component mapping related classes to javadoc 2024-10-22 22:19:00 +02:00
Kilian Schüttler
62e26add99 Merge branch 'RED-10194' into 'master'
RED-10194: add component mapping related classes to javadoc

Closes RED-10194

See merge request redactmanager/redaction-service!542
2024-10-16 12:04:36 +02:00
Kilian Schuettler
1293484295 RED-10194: add component mapping related classes to javadoc 2024-10-16 11:38:59 +02:00
Kilian Schüttler
de0027dd6d Merge branch 'feature/RED-10125-fp' into 'master'
RED-10125: fix Normalizer in drools

Closes RED-10125

See merge request redactmanager/redaction-service!541
2024-10-15 10:59:35 +02:00
Kilian Schüttler
d73ec58d8a RED-10125: fix Normalizer in drools 2024-10-15 10:59:34 +02:00
Maverick Studer
9e032cf1a8 Merge branch 'RED-10104' into 'master'
RED-10104: Add rectangle functionality to bulk-local/add endpoint

Closes RED-10104

See merge request redactmanager/redaction-service!538
2024-10-14 17:19:41 +02:00
Maverick Studer
c2fd73efe7 RED-10104: Add rectangle functionality to bulk-local/add endpoint 2024-10-14 17:19:41 +02:00
Kilian Schüttler
db405278bc Merge branch 'feature/RM-184' into 'master'
RM-184 && RM-172: update SectionIdentifier javadoc, make RulesLogger and...

Closes RM-184

See merge request redactmanager/redaction-service!535
2024-10-10 10:35:12 +02:00
Kilian Schüttler
11a9f2f8aa RM-184 && RM-172: update SectionIdentifier javadoc, make RulesLogger and... 2024-10-10 10:35:12 +02:00
Maverick Studer
0aea884da2 Merge branch 'RED-9123' into 'master'
RED-9123: Improve performance of re-analysis (Spike)

Closes RED-9123

See merge request redactmanager/redaction-service!450
2024-10-08 09:24:19 +02:00
Maverick Studer
d32c56e101 RED-9123: Improve performance of re-analysis (Spike) 2024-10-08 09:24:18 +02:00
Maverick Studer
92d03de194 Merge branch 'hotfix-npe-component-log-fp' into 'master'
Hotfix for NPE when creating component log

See merge request redactmanager/redaction-service!534
2024-10-07 18:01:16 +02:00
maverickstuder
6a58bf6d44 Hotfix for NPE when creating component log 2024-10-07 16:36:53 +02:00
Dominique Eifländer
daed4e07ef Merge branch 'RED-10131-master' into 'master'
RED-10131: Catch stackoverflow error of unoptimized regexes

Closes RED-10131

See merge request redactmanager/redaction-service!532
2024-10-07 13:16:24 +02:00
Dominique Eifländer
eceba3a37a RED-10131: Catch stackoverflow error of unoptimized regexes 2024-10-07 13:00:00 +02:00
Maverick Studer
4b647b23a3 Merge branch 'RED-10146' into 'master'
RED-10146: Include defined components in component log

Closes RED-10146

See merge request redactmanager/redaction-service!529
2024-10-07 12:26:05 +02:00
Maverick Studer
dd1b7cf72f RED-10146: Include defined components in component log 2024-10-07 12:26:04 +02:00
Maverick Studer
725e6c1e14 Merge branch 'RED-9933' into 'master'
RED-9933: updated date formats

Closes RED-9933

See merge request redactmanager/redaction-service!527
2024-09-30 11:33:03 +02:00
maverickstuder
ab49e5d296 RED-9933: updated date formats 2024-09-30 09:55:24 +02:00
Maverick Studer
48d14cff8f Merge branch 'RED-9947-fix2' into 'master'
RED-9947 && RED-10104

See merge request redactmanager/redaction-service!526
2024-09-27 11:05:26 +02:00
Maverick Studer
dbe8e08bba RED-9947 && RED-10104 2024-09-27 11:05:26 +02:00
Maverick Studer
98456ceb6d Merge branch 'RED-9933' into 'master'
RED-9933: DocuMine DateFormat config in dossier templates

Closes RED-9933

See merge request redactmanager/redaction-service!523
2024-09-25 12:21:39 +02:00
Maverick Studer
a7effce48e RED-9933: DocuMine DateFormat config in dossier templates 2024-09-25 12:21:38 +02:00
Maverick Studer
d068160a9d Merge branch 'RED-9933' into 'master'
RED-9933: DocuMine DateFormat config in dossier templates

Closes RED-9933

See merge request redactmanager/redaction-service!522
2024-09-24 11:37:51 +02:00
Maverick Studer
f37e49e8bb RED-9933: DocuMine DateFormat config in dossier templates 2024-09-24 11:37:50 +02:00
Maverick Studer
1f3cf8d529 Merge branch 'feature/RED-9348' into 'master'
RED-9348: move component log to mongodb

Closes RED-9348

See merge request redactmanager/redaction-service!519
2024-09-23 16:05:03 +02:00
Maverick Studer
b61b89bc5b RED-9348: move component log to mongodb 2024-09-23 16:05:03 +02:00
Maverick Studer
8aa31a18af Merge branch 'feature/RED-9010' into 'master'
RED-9010: remove redaction log

Closes RED-9010

See merge request redactmanager/redaction-service!520
2024-09-19 11:34:34 +02:00
maverickstuder
b2a837bc54 RED-9010: remove redaction log 2024-09-18 12:29:38 +02:00
Maverick Studer
f93e59e29e Merge branch 'RED-9524' into 'master'
RED-9524: File processing does not annotate images

Closes RED-9524

See merge request redactmanager/redaction-service!507
2024-09-18 10:47:10 +02:00
Maverick Studer
b8612d9b95 RED-9524: File processing does not annotate images 2024-09-18 10:47:10 +02:00
Kilian Schüttler
1683745dc7 Merge branch 'hotfix' into 'master'
RED-9975: extend SectionIdentifier to alphanumeric

See merge request redactmanager/redaction-service!515
2024-09-11 13:38:35 +02:00
Kilian Schüttler
ec50eca15b RED-9975: extend SectionIdentifier to alphanumeric 2024-09-11 13:38:35 +02:00
Maverick Studer
3ff541fee6 Merge branch 'RED-9859' into 'master'
RED-9859: Redactions found by et. al. rule not skipped with published information

Closes RED-9859

See merge request redactmanager/redaction-service!512
2024-09-10 14:24:09 +02:00
Maverick Studer
07b9b8bf8a RED-9859: Redactions found by et. al. rule not skipped with published information 2024-09-10 14:24:09 +02:00
Andrei Isvoran
3c165070ee Merge branch 'RED-9986-DM12' into 'master'
RED-9986 - Add component rules path to be scanned for Javadoc generation

Closes RED-9986

See merge request redactmanager/redaction-service!510
2024-09-09 10:13:57 +02:00
Andrei Isvoran
ecd57e17a2 RED-9986 - Add component rules path to be scanned for Javadoc generation 2024-09-09 10:13:57 +02:00
Kilian Schüttler
9fc065518d Merge branch 'RED-9548' into 'master'
RED-9548 - Actions results in wrong results for deadlocked annotation

Closes RED-9548

See merge request redactmanager/redaction-service!509
2024-09-06 14:43:57 +02:00
Corina Olariu
3c3c029cf4 RED-9548 - Actions results in wrong results for deadlocked annotation 2024-09-06 14:43:57 +02:00
Kilian Schüttler
d767966056 Merge branch 'RED-9728' into 'master'
RED-9728: remove False Positives from table methods

Closes RED-9728

See merge request redactmanager/redaction-service!508
2024-09-06 11:07:49 +02:00
Kilian Schüttler
bcbd4587f1 RED-9728: remove False Positives from table methods 2024-09-06 11:07:48 +02:00
Maverick Studer
03e321a824 Merge branch 'RED-9947' into 'master'
RED-9947: search term occurrences implementation for local bulk

Closes RED-9947

See merge request redactmanager/redaction-service!504
2024-09-04 11:51:39 +02:00
Maverick Studer
316b4c1d02 RED-9947: search term occurrences implementation for local bulk 2024-09-04 11:51:39 +02:00
Kilian Schüttler
3e0f2254ed Merge branch 'RED-9964-fp' into 'master'
RED-9964: fix errors with images

Closes RED-9964

See merge request redactmanager/redaction-service!506
2024-09-04 09:30:34 +02:00
Kilian Schüttler
cd2bda15aa RED-9964: fix errors with images 2024-09-04 09:30:33 +02:00
Kilian Schüttler
895bc56590 Merge branch 'RED-9964-fp' into 'master'
RED-9964: refactor getMainBody() and getMainBodyTextBlock() in Page

Closes RED-9964

See merge request redactmanager/redaction-service!501
2024-09-02 16:51:13 +02:00
Kilian Schüttler
292869c502 RED-9964: refactor getMainBody() and getMainBodyTextBlock() in Page 2024-09-02 16:51:12 +02:00
Kilian Schüttler
f3cdf46008 Merge branch 'entitylog-migration' into 'master'
entitylog-mapping: add test to map entitylogs to existing DocumentData

See merge request redactmanager/redaction-service!503
2024-09-02 16:24:11 +02:00
Kilian Schüttler
795f8fd31b entitylog-mapping: add test to map entitylogs to existing DocumentData 2024-09-02 16:24:11 +02:00
Kevin Tumma
403e2f4153 Update .gitlab-ci.yml file 2024-08-30 11:22:52 +02:00
Kevin Tumma
5470157468 Update .gitlab-ci.yml file 2024-08-30 10:56:49 +02:00
Kevin Tumma
80aaeea8dd Update .gitlab-ci.yml file 2024-08-30 10:35:20 +02:00
Kevin Tumma
5a5f14127b Update .gitlab-ci.yml file 2024-08-30 10:23:27 +02:00
Kevin Tumma
bc49cc6e8d Update .gitlab-ci.yml file 2024-08-30 09:52:09 +02:00
Maverick Studer
59103d3075 Merge branch 'tenants-retry' into 'master'
Tenants retry logic and queue renames

See merge request redactmanager/redaction-service!500
2024-08-29 16:12:38 +02:00
Maverick Studer
6cf17ef4f3 Tenants retry logic and queue renames 2024-08-29 16:12:37 +02:00
Maverick Studer
40d832fc3a Merge branch 'REVERT-RED-7327' into 'master'
Revert "RED-7327 - Change to group annotation"

Closes RED-7327

See merge request redactmanager/redaction-service!499
2024-08-29 10:18:49 +02:00
Maverick Studer
b9240dec68 Revert "RED-7327 - Change to group annotation" 2024-08-29 10:18:48 +02:00
Kevin Tumma
ea7137b242 Update .gitlab-ci.yml file 2024-08-28 11:09:09 +02:00
Kevin Tumma
8375783ce8 Update .gitlab-ci.yml file 2024-08-28 10:55:28 +02:00
Kilian Schüttler
ebee9f04bd Merge branch 'SPIKE-LLM_NER' into 'master'
Spike: LLM NER

See merge request redactmanager/redaction-service!498
2024-08-27 18:00:59 +02:00
Kilian Schüttler
4dc0a1fbdc Spike: LLM NER 2024-08-27 18:00:59 +02:00
Maverick Studer
5ebe82b7ce Merge branch 'RED-9331' into 'master'
RED-9331: Explore possibilities for fair upload / analysis processing per tenant

Closes RED-9331

See merge request redactmanager/redaction-service!463
2024-08-27 13:22:19 +02:00
Maverick Studer
d1c2d43ffb RED-9331: Explore possibilities for fair upload / analysis processing per tenant 2024-08-27 13:22:18 +02:00
Maverick Studer
004f6cb5f9 Merge branch 'AZURE_NER_FP' into 'master'
Update layout parser version for azure ner service

See merge request redactmanager/redaction-service!497
2024-08-27 10:13:58 +02:00
Maverick Studer
4fc24fdfe3 Update layout parser version for azure ner service 2024-08-27 10:13:58 +02:00
Maverick Studer
9d668f9be1 Merge branch 'AZURE_NER_FP' into 'master'
RED-9918: Azure entity recognition (Spike)

See merge request redactmanager/redaction-service!496
2024-08-26 14:34:45 +02:00
Maverick Studer
4184333506 RED-9918: Azure entity recognition (Spike) 2024-08-26 14:34:44 +02:00
Maverick Studer
062d29c2ea Merge branch 'RED-9865' into 'master'
RED-9865: fix for case 2

Closes RED-9865

See merge request redactmanager/redaction-service!493
2024-08-23 17:01:14 +02:00
Maverick Studer
dafecc35b6 RED-9865: fix for case 2 2024-08-23 17:01:13 +02:00
Andrei Isvoran
5327591d54 Merge branch 'RED-8694-javadoc' into 'master'
RED-8694 - Add more javadoc

Closes RED-8694

See merge request redactmanager/redaction-service!492
2024-08-23 10:26:39 +02:00
Andrei Isvoran
581e367b6e RED-8694 - Add more javadoc 2024-08-23 09:56:59 +03:00
Andrei Isvoran
b58985fa8a Merge branch 'RED-7327-fixes' into 'master'
RED-7327 - Change to group annotation

Closes RED-7327

See merge request redactmanager/redaction-service!491
2024-08-22 12:09:07 +02:00
Andrei Isvoran
cdfa1afcd0 RED-7327 - Change to group annotation 2024-08-22 12:09:07 +02:00
Andrei Isvoran
78990b5555 Merge branch 'RED-7327' into 'master'
RED-7327 - Add group redactions

Closes RED-7327

See merge request redactmanager/redaction-service!486
2024-08-20 10:32:09 +02:00
Andrei Isvoran
2dada39717 RED-7327 - Add group redactions 2024-08-20 10:32:08 +02:00
Dominique Eifländer
66647e45b0 Merge branch 'RED-9837-master' into 'master'
RED-9837: Fixed not working timeout with endless loop in drools then block

Closes RED-9837

See merge request redactmanager/redaction-service!489
2024-08-19 14:08:25 +02:00
Dominique Eifländer
2ae8b0fbce RED-9837: Fixed not working timeout with endless loop in drools then block 2024-08-19 13:18:00 +02:00
Dominique Eifländer
00409584ee Merge branch 'RED-9760-master-rules' into 'master'
RED-9760: Do not check blacklisted keywords in Strings

Closes RED-9760

See merge request redactmanager/redaction-service!487
2024-08-13 11:57:37 +02:00
Dominique Eifländer
8fc1f77688 RED-9760: Do not check blacklisted keywords in Strings 2024-08-13 11:12:36 +02:00
Maverick Studer
594acb4e82 Merge branch 'RED-9782-fix' into 'master'
RED-9782: Automated Analysis should be disabled when uploading a document that...

Closes RED-9782

See merge request redactmanager/redaction-service!485
2024-08-12 18:41:01 +02:00
Maverick Studer
f9c77a3695 RED-9782: Automated Analysis should be disabled when uploading a document that... 2024-08-12 18:41:00 +02:00
Kilian Schüttler
30cd60f702 Merge branch 'hotfix-UOE' into 'master'
hotfix: UOE in ComponentDroolsExecutionService

See merge request redactmanager/redaction-service!483
2024-08-12 15:59:09 +02:00
Kilian Schuettler
6f361f0deb hotfix: UOE in ComponentDroolsExecutionService 2024-08-12 15:22:13 +02:00
Kilian Schüttler
f629448e4b Merge branch 'RED-9869' into 'master'
RED-9869: allow java.text and find ruleIdentifiers with whitespaces/linebreaks

Closes RED-9869

See merge request redactmanager/redaction-service!479
2024-08-12 12:49:47 +02:00
Kilian Schuettler
1b67ea1068 RED-9869: allow java.text and find ruleIdentifiers with whitespaces/linebreaks 2024-08-12 12:34:13 +02:00
Maverick Studer
9dbf5c479f Merge branch 'hotfixes-dm-release-fp' into 'master'
Hotfixes dm release fp

See merge request redactmanager/redaction-service!478
2024-08-12 09:49:07 +02:00
Maverick Studer
c760733e41 Hotfixes dm release fp 2024-08-12 09:49:07 +02:00
Yannik Hampe
99182fba23 Merge branch 'RED-5624-migration' into 'master'
RED-5625: hotfix migration

Closes RED-5624

See merge request redactmanager/redaction-service!476
2024-08-09 14:08:51 +02:00
Yannik Hampe
ac5a7a73b1 RED-5625: hotfix migration 2024-08-09 14:08:51 +02:00
Maverick Studer
9d093addaf Merge branch 'RED-9857' into 'master'
RED-9857: Add new date format

Closes RED-9857

See merge request redactmanager/redaction-service!474
2024-08-09 10:30:24 +02:00
Maverick Studer
56cabd54a2 RED-9857: Add new date format 2024-08-09 10:30:24 +02:00
Christoph Schabert
7edf00f014 Update .gitlab-ci.yml file 2024-08-08 14:00:44 +02:00
Christoph Schabert
3a5072d7af Update .gitlab-ci.yml file 2024-08-08 13:26:08 +02:00
Christoph Schabert
02abe895a1 Update .gitlab-ci.yml file 2024-08-08 13:25:41 +02:00
Christoph Schabert
c29329e3df Update .gitlab-ci.yml file 2024-08-08 12:54:24 +02:00
Christoph Schabert
b8cf0ab005 Update .gitlab-ci.yml file 2024-08-08 12:46:20 +02:00
Christoph Schabert
5a4d60eb03 Update .gitlab-ci.yml file 2024-08-08 12:15:10 +02:00
Christoph Schabert
7bc76d9306 Update .gitlab-ci.yml file 2024-08-08 12:03:36 +02:00
Christoph Schabert
c351a71831 Update .gitlab-ci.yml file 2024-08-08 11:58:03 +02:00
Christoph Schabert
1d135a26fa Update .gitlab-ci.yml file 2024-08-08 11:44:53 +02:00
Christoph Schabert
5e8dc747bb Update .gitlab-ci.yml file 2024-08-08 10:11:32 +02:00
Maverick Studer
5ff3ebc6cb Merge branch 'RED-9782-fp' into 'master'
RED-9782: Automated Analysis should be disabled when uploading a document that...

Closes RED-9782

See merge request redactmanager/redaction-service!472
2024-08-07 12:26:01 +02:00
Maverick Studer
4aac5fb71a RED-9782: Automated Analysis should be disabled when uploading a document that... 2024-08-07 12:26:00 +02:00
Maverick Studer
ae8b82245f Merge branch 'RED-9782-master' into 'master'
RED-9782: Added analysis function that only imports imported redactions

Closes RED-9782

See merge request redactmanager/redaction-service!470
2024-08-07 10:34:29 +02:00
Dominique Eifländer
5abe73cae2 RED-9782: Added analysis function that only imports imported redactions 2024-08-07 09:44:27 +02:00
Kilian Schüttler
b6d1edacba Merge branch 'annotationMode' into 'master'
annotationMode: ignore IDs of manual adds in annotationMode

See merge request redactmanager/redaction-service!465
2024-07-26 14:53:44 +02:00
Kilian Schuettler
80bc8242d0 annotationMode: ignore IDs of manual adds in annotationMode 2024-07-25 17:01:41 +02:00
Andrei Isvoran
2ab536dfee Merge branch 'RED-9140-fp' into 'master'
RED-9140 - Properly distinct between REMOVED and IGNORED Change type

Closes RED-9140

See merge request redactmanager/redaction-service!464
2024-07-23 15:58:26 +02:00
Andrei Isvoran
e09cd59256 RED-9140 - Properly distinct between REMOVED and IGNORED Change type 2024-07-23 15:58:25 +02:00
492 changed files with 154935 additions and 3821960 deletions

View File

@ -7,20 +7,25 @@ include:
ref: 'main'
file: 'ci-templates/gradle_java.yml'
deploy JavaDoc:
publish dependencies:
stage: deploy
tags:
- dind
script:
- echo "Building JavaDoc with gradle version ${BUILDVERSION}"
- echo "Publishing dependencies with gradle version ${BUILDVERSION}"
- gradle -Pversion=${BUILDVERSION} publish
- echo "BUILDVERSION=$(echo ${BUILDVERSION})" >> variables.env
artifacts:
reports:
dotenv: variables.env
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
- if: $CI_COMMIT_BRANCH =~ /^release/
- if: $CI_COMMIT_BRANCH =~ /^feature/
- if: $CI_COMMIT_TAG
generateJavaDoc:
stage: build
generate JavaDoc:
stage: deploy
tags:
- dind
script:
@ -35,14 +40,39 @@ generateJavaDoc:
- if: $CI_COMMIT_TAG
pages:
stage: build
stage: deploy
needs:
- generateJavaDoc
- generate JavaDoc
- publish dependencies
- calculate minor version
pages:
path_prefix: "$BUILDVERSION"
script:
- mkdir public
- mv redaction-service-v1/redaction-service-server-v1/javadoc/* public/
- URL=$(echo $BUILDVERSION | sed -e 's|\.|-|g')
- echo "Deploying to ${CI_PAGES_URL}/${URL}"
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
artifacts:
paths:
- public
publish JavaDoc to azure:
image: rclone/rclone:1.67.0
tags:
- dind
stage: deploy
when: manual
variables:
VERSION_NAME: "latest"
needs:
- generate JavaDoc
script:
- echo "Deploy JavaDoc with version ${VERSION_NAME} to prod"
- rclone delete azurejavadocs:/$RCLONE_CONFIG_AZUREJAVADOCS_CONTAINER/${VERSION_NAME}
- rclone copy redaction-service-v1/redaction-service-server-v1/javadoc/ azurejavadocs:/$RCLONE_CONFIG_AZUREJAVADOCS_CONTAINER/javadoc/${VERSION_NAME}/
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
- if: $CI_COMMIT_BRANCH =~ /^release/
- if: $CI_COMMIT_TAG

View File

@ -15,8 +15,13 @@ pmd {
isConsoleOutput = true
}
tasks.checkstyleMain {
exclude("**/data/**") // ignore generated proto files
}
tasks.pmdMain {
pmd.ruleSetFiles = files("${rootDir}/config/pmd/pmd.xml")
exclude("**/data/**") // ignore generated proto files
}
tasks.pmdTest {
@ -28,6 +33,8 @@ tasks.named<Test>("test") {
reports {
junitXml.outputLocation.set(layout.buildDirectory.dir("reports/junit"))
}
minHeapSize = "512m"
maxHeapSize = "2048m"
}
tasks.test {

View File

@ -9,11 +9,14 @@ gradle assemble
# Get the current Git branch
branch=$(git rev-parse --abbrev-ref HEAD)
# Replace any slashes (e.g., in 'feature/' or 'release/') with a hyphen
cleaned_branch=$(echo "$branch" | sed 's/\//_/g')
# Get the short commit hash (first 5 characters)
commit_hash=$(git rev-parse --short=5 HEAD)
# Combine branch and commit hash
buildName="${USER}-${branch}-${commit_hash}"
buildName="${USER}-${cleaned_branch}-${commit_hash}"
gradle bootBuildImage --publishImage -PbuildbootDockerHostNetwork=true -Pversion=${buildName}

View File

@ -0,0 +1,35 @@
plugins {
id("com.iqser.red.service.java-conventions")
id("io.freefair.lombok") version "8.4"
}
description = "redaction-service-document"
val persistenceServiceVersion = "2.612.0-RED10072.1"
val layoutParserVersion = "newNode"
group = "com.knecon.fforesight"
dependencies {
implementation("com.iqser.red.service:persistence-service-internal-api-v1:${persistenceServiceVersion}")
api("com.google.protobuf:protobuf-java-util:4.28.3")
testImplementation("org.junit.jupiter:junit-jupiter-api:5.8.1")
testRuntimeOnly("org.junit.jupiter:junit-jupiter-engine:5.8.1")
}
publishing {
publications {
create<MavenPublication>(name) {
from(components["java"])
}
}
repositories {
maven {
url = uri("https://nexus.knecon.com/repository/red-platform-releases/")
credentials {
username = providers.gradleProperty("mavenUser").getOrNull();
password = providers.gradleProperty("mavenPassword").getOrNull();
}
}
}
}

View File

@ -0,0 +1,36 @@
package com.iqser.red.service.redaction.v1.server.data;
import static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure;
import java.io.Serializable;
import com.iqser.red.service.redaction.v1.server.data.DocumentPageProto.AllDocumentPages;
import com.iqser.red.service.redaction.v1.server.data.DocumentPositionDataProto.AllDocumentPositionData;
import com.iqser.red.service.redaction.v1.server.data.DocumentTextDataProto.AllDocumentTextData;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.FieldDefaults;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
public class DocumentData implements Serializable {
AllDocumentPages documentPages;
AllDocumentTextData documentTextData;
AllDocumentPositionData documentPositionData;
DocumentStructureWrapper documentStructureWrapper;
public DocumentStructure getDocumentStructure() {
return documentStructureWrapper.getDocumentStructure();
}
}

View File

@ -0,0 +1,694 @@
// Generated by the protocol buffer compiler. DO NOT EDIT!
// NO CHECKED-IN PROTOBUF GENCODE
// source: DocumentStructure.proto
// Protobuf Java Version: 4.28.3
package com.iqser.red.service.redaction.v1.server.data;
public final class DocumentStructureProto {
private DocumentStructureProto() {}
static {
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
/* major= */ 4,
/* minor= */ 28,
/* patch= */ 3,
/* suffix= */ "",
DocumentStructureProto.class.getName());
}
public static void registerAllExtensions(
com.google.protobuf.ExtensionRegistryLite registry) {
}
public static void registerAllExtensions(
com.google.protobuf.ExtensionRegistry registry) {
registerAllExtensions(
(com.google.protobuf.ExtensionRegistryLite) registry);
}
public interface DocumentStructureOrBuilder extends
// @@protoc_insertion_point(interface_extends:DocumentStructure)
com.google.protobuf.MessageOrBuilder {
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
* @return Whether the root field is set.
*/
boolean hasRoot();
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
* @return The root.
*/
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData getRoot();
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
*/
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryDataOrBuilder getRootOrBuilder();
}
/**
* Protobuf type {@code DocumentStructure}
*/
public static final class DocumentStructure extends
com.google.protobuf.GeneratedMessage implements
// @@protoc_insertion_point(message_implements:DocumentStructure)
DocumentStructureOrBuilder {
private static final long serialVersionUID = 0L;
static {
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
/* major= */ 4,
/* minor= */ 28,
/* patch= */ 3,
/* suffix= */ "",
DocumentStructure.class.getName());
}
// Use DocumentStructure.newBuilder() to construct.
private DocumentStructure(com.google.protobuf.GeneratedMessage.Builder<?> builder) {
super(builder);
}
private DocumentStructure() {
}
public static final com.google.protobuf.Descriptors.Descriptor
getDescriptor() {
return com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.internal_static_DocumentStructure_descriptor;
}
@java.lang.Override
protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
internalGetFieldAccessorTable() {
return com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.internal_static_DocumentStructure_fieldAccessorTable
.ensureFieldAccessorsInitialized(
com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure.class, com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure.Builder.class);
}
private int bitField0_;
public static final int ROOT_FIELD_NUMBER = 1;
private com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData root_;
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
* @return Whether the root field is set.
*/
@java.lang.Override
public boolean hasRoot() {
return ((bitField0_ & 0x00000001) != 0);
}
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
* @return The root.
*/
@java.lang.Override
public com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData getRoot() {
return root_ == null ? com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.getDefaultInstance() : root_;
}
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
*/
@java.lang.Override
public com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryDataOrBuilder getRootOrBuilder() {
return root_ == null ? com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.getDefaultInstance() : root_;
}
private byte memoizedIsInitialized = -1;
@java.lang.Override
public final boolean isInitialized() {
byte isInitialized = memoizedIsInitialized;
if (isInitialized == 1) return true;
if (isInitialized == 0) return false;
memoizedIsInitialized = 1;
return true;
}
@java.lang.Override
public void writeTo(com.google.protobuf.CodedOutputStream output)
throws java.io.IOException {
if (((bitField0_ & 0x00000001) != 0)) {
output.writeMessage(1, getRoot());
}
getUnknownFields().writeTo(output);
}
@java.lang.Override
public int getSerializedSize() {
int size = memoizedSize;
if (size != -1) return size;
size = 0;
if (((bitField0_ & 0x00000001) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeMessageSize(1, getRoot());
}
size += getUnknownFields().getSerializedSize();
memoizedSize = size;
return size;
}
@java.lang.Override
public boolean equals(final java.lang.Object obj) {
if (obj == this) {
return true;
}
if (!(obj instanceof com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure)) {
return super.equals(obj);
}
com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure other = (com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure) obj;
if (hasRoot() != other.hasRoot()) return false;
if (hasRoot()) {
if (!getRoot()
.equals(other.getRoot())) return false;
}
if (!getUnknownFields().equals(other.getUnknownFields())) return false;
return true;
}
@java.lang.Override
public int hashCode() {
if (memoizedHashCode != 0) {
return memoizedHashCode;
}
int hash = 41;
hash = (19 * hash) + getDescriptor().hashCode();
if (hasRoot()) {
hash = (37 * hash) + ROOT_FIELD_NUMBER;
hash = (53 * hash) + getRoot().hashCode();
}
hash = (29 * hash) + getUnknownFields().hashCode();
memoizedHashCode = hash;
return hash;
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(
java.nio.ByteBuffer data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(
java.nio.ByteBuffer data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(
com.google.protobuf.ByteString data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(
com.google.protobuf.ByteString data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(byte[] data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(
byte[] data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(java.io.InputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessage
.parseWithIOException(PARSER, input);
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(
java.io.InputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessage
.parseWithIOException(PARSER, input, extensionRegistry);
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseDelimitedFrom(java.io.InputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessage
.parseDelimitedWithIOException(PARSER, input);
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseDelimitedFrom(
java.io.InputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessage
.parseDelimitedWithIOException(PARSER, input, extensionRegistry);
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(
com.google.protobuf.CodedInputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessage
.parseWithIOException(PARSER, input);
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure parseFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessage
.parseWithIOException(PARSER, input, extensionRegistry);
}
@java.lang.Override
public Builder newBuilderForType() { return newBuilder(); }
public static Builder newBuilder() {
return DEFAULT_INSTANCE.toBuilder();
}
public static Builder newBuilder(com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure prototype) {
return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype);
}
@java.lang.Override
public Builder toBuilder() {
return this == DEFAULT_INSTANCE
? new Builder() : new Builder().mergeFrom(this);
}
@java.lang.Override
protected Builder newBuilderForType(
com.google.protobuf.GeneratedMessage.BuilderParent parent) {
Builder builder = new Builder(parent);
return builder;
}
/**
* Protobuf type {@code DocumentStructure}
*/
public static final class Builder extends
com.google.protobuf.GeneratedMessage.Builder<Builder> implements
// @@protoc_insertion_point(builder_implements:DocumentStructure)
com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructureOrBuilder {
public static final com.google.protobuf.Descriptors.Descriptor
getDescriptor() {
return com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.internal_static_DocumentStructure_descriptor;
}
@java.lang.Override
protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
internalGetFieldAccessorTable() {
return com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.internal_static_DocumentStructure_fieldAccessorTable
.ensureFieldAccessorsInitialized(
com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure.class, com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure.Builder.class);
}
// Construct using com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure.newBuilder()
private Builder() {
maybeForceBuilderInitialization();
}
private Builder(
com.google.protobuf.GeneratedMessage.BuilderParent parent) {
super(parent);
maybeForceBuilderInitialization();
}
private void maybeForceBuilderInitialization() {
if (com.google.protobuf.GeneratedMessage
.alwaysUseFieldBuilders) {
getRootFieldBuilder();
}
}
@java.lang.Override
public Builder clear() {
super.clear();
bitField0_ = 0;
root_ = null;
if (rootBuilder_ != null) {
rootBuilder_.dispose();
rootBuilder_ = null;
}
return this;
}
@java.lang.Override
public com.google.protobuf.Descriptors.Descriptor
getDescriptorForType() {
return com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.internal_static_DocumentStructure_descriptor;
}
@java.lang.Override
public com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure getDefaultInstanceForType() {
return com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure.getDefaultInstance();
}
@java.lang.Override
public com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure build() {
com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure result = buildPartial();
if (!result.isInitialized()) {
throw newUninitializedMessageException(result);
}
return result;
}
@java.lang.Override
public com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure buildPartial() {
com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure result = new com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure(this);
if (bitField0_ != 0) { buildPartial0(result); }
onBuilt();
return result;
}
private void buildPartial0(com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure result) {
int from_bitField0_ = bitField0_;
int to_bitField0_ = 0;
if (((from_bitField0_ & 0x00000001) != 0)) {
result.root_ = rootBuilder_ == null
? root_
: rootBuilder_.build();
to_bitField0_ |= 0x00000001;
}
result.bitField0_ |= to_bitField0_;
}
@java.lang.Override
public Builder mergeFrom(com.google.protobuf.Message other) {
if (other instanceof com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure) {
return mergeFrom((com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure)other);
} else {
super.mergeFrom(other);
return this;
}
}
public Builder mergeFrom(com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure other) {
if (other == com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure.getDefaultInstance()) return this;
if (other.hasRoot()) {
mergeRoot(other.getRoot());
}
this.mergeUnknownFields(other.getUnknownFields());
onChanged();
return this;
}
@java.lang.Override
public final boolean isInitialized() {
return true;
}
@java.lang.Override
public Builder mergeFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
if (extensionRegistry == null) {
throw new java.lang.NullPointerException();
}
try {
boolean done = false;
while (!done) {
int tag = input.readTag();
switch (tag) {
case 0:
done = true;
break;
case 10: {
input.readMessage(
getRootFieldBuilder().getBuilder(),
extensionRegistry);
bitField0_ |= 0x00000001;
break;
} // case 10
default: {
if (!super.parseUnknownField(input, extensionRegistry, tag)) {
done = true; // was an endgroup tag
}
break;
} // default:
} // switch (tag)
} // while (!done)
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
throw e.unwrapIOException();
} finally {
onChanged();
} // finally
return this;
}
private int bitField0_;
private com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData root_;
private com.google.protobuf.SingleFieldBuilder<
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData, com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.Builder, com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryDataOrBuilder> rootBuilder_;
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
* @return Whether the root field is set.
*/
public boolean hasRoot() {
return ((bitField0_ & 0x00000001) != 0);
}
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
* @return The root.
*/
public com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData getRoot() {
if (rootBuilder_ == null) {
return root_ == null ? com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.getDefaultInstance() : root_;
} else {
return rootBuilder_.getMessage();
}
}
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
*/
public Builder setRoot(com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData value) {
if (rootBuilder_ == null) {
if (value == null) {
throw new NullPointerException();
}
root_ = value;
} else {
rootBuilder_.setMessage(value);
}
bitField0_ |= 0x00000001;
onChanged();
return this;
}
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
*/
public Builder setRoot(
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.Builder builderForValue) {
if (rootBuilder_ == null) {
root_ = builderForValue.build();
} else {
rootBuilder_.setMessage(builderForValue.build());
}
bitField0_ |= 0x00000001;
onChanged();
return this;
}
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
*/
public Builder mergeRoot(com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData value) {
if (rootBuilder_ == null) {
if (((bitField0_ & 0x00000001) != 0) &&
root_ != null &&
root_ != com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.getDefaultInstance()) {
getRootBuilder().mergeFrom(value);
} else {
root_ = value;
}
} else {
rootBuilder_.mergeFrom(value);
}
if (root_ != null) {
bitField0_ |= 0x00000001;
onChanged();
}
return this;
}
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
*/
public Builder clearRoot() {
bitField0_ = (bitField0_ & ~0x00000001);
root_ = null;
if (rootBuilder_ != null) {
rootBuilder_.dispose();
rootBuilder_ = null;
}
onChanged();
return this;
}
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
*/
public com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.Builder getRootBuilder() {
bitField0_ |= 0x00000001;
onChanged();
return getRootFieldBuilder().getBuilder();
}
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
*/
public com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryDataOrBuilder getRootOrBuilder() {
if (rootBuilder_ != null) {
return rootBuilder_.getMessageOrBuilder();
} else {
return root_ == null ?
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.getDefaultInstance() : root_;
}
}
/**
* <pre>
* The root EntryData represents the Document.
* </pre>
*
* <code>.EntryData root = 1;</code>
*/
private com.google.protobuf.SingleFieldBuilder<
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData, com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.Builder, com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryDataOrBuilder>
getRootFieldBuilder() {
if (rootBuilder_ == null) {
rootBuilder_ = new com.google.protobuf.SingleFieldBuilder<
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData, com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData.Builder, com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryDataOrBuilder>(
getRoot(),
getParentForChildren(),
isClean());
root_ = null;
}
return rootBuilder_;
}
// @@protoc_insertion_point(builder_scope:DocumentStructure)
}
// @@protoc_insertion_point(class_scope:DocumentStructure)
private static final com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure DEFAULT_INSTANCE;
static {
DEFAULT_INSTANCE = new com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure();
}
public static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure getDefaultInstance() {
return DEFAULT_INSTANCE;
}
private static final com.google.protobuf.Parser<DocumentStructure>
PARSER = new com.google.protobuf.AbstractParser<DocumentStructure>() {
@java.lang.Override
public DocumentStructure parsePartialFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
Builder builder = newBuilder();
try {
builder.mergeFrom(input, extensionRegistry);
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
throw e.setUnfinishedMessage(builder.buildPartial());
} catch (com.google.protobuf.UninitializedMessageException e) {
throw e.asInvalidProtocolBufferException().setUnfinishedMessage(builder.buildPartial());
} catch (java.io.IOException e) {
throw new com.google.protobuf.InvalidProtocolBufferException(e)
.setUnfinishedMessage(builder.buildPartial());
}
return builder.buildPartial();
}
};
public static com.google.protobuf.Parser<DocumentStructure> parser() {
return PARSER;
}
@java.lang.Override
public com.google.protobuf.Parser<DocumentStructure> getParserForType() {
return PARSER;
}
@java.lang.Override
public com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure getDefaultInstanceForType() {
return DEFAULT_INSTANCE;
}
}
private static final com.google.protobuf.Descriptors.Descriptor
internal_static_DocumentStructure_descriptor;
private static final
com.google.protobuf.GeneratedMessage.FieldAccessorTable
internal_static_DocumentStructure_fieldAccessorTable;
public static com.google.protobuf.Descriptors.FileDescriptor
getDescriptor() {
return descriptor;
}
private static com.google.protobuf.Descriptors.FileDescriptor
descriptor;
static {
java.lang.String[] descriptorData = {
"\n\027DocumentStructure.proto\032\017EntryData.pro" +
"to\"-\n\021DocumentStructure\022\030\n\004root\030\001 \001(\0132\n." +
"EntryDataBH\n.com.iqser.red.service.redac" +
"tion.v1.server.dataB\026DocumentStructurePr" +
"otob\006proto3"
};
descriptor = com.google.protobuf.Descriptors.FileDescriptor
.internalBuildGeneratedFileFrom(descriptorData,
new com.google.protobuf.Descriptors.FileDescriptor[] {
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.getDescriptor(),
});
internal_static_DocumentStructure_descriptor =
getDescriptor().getMessageTypes().get(0);
internal_static_DocumentStructure_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_DocumentStructure_descriptor,
new java.lang.String[] { "Root", });
descriptor.resolveAllFeaturesImmutable();
com.iqser.red.service.redaction.v1.server.data.EntryDataProto.getDescriptor();
}
// @@protoc_insertion_point(outer_class_scope)
}

View File

@ -0,0 +1,115 @@
package com.iqser.red.service.redaction.v1.server.data;
import static com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure;
import static com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData;
import java.awt.geom.Rectangle2D;
import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Stream;
import lombok.AllArgsConstructor;
import lombok.Getter;
@Getter
@AllArgsConstructor
public class DocumentStructureWrapper implements Serializable {
private final DocumentStructure documentStructure;
public static class TableProperties implements Serializable {
public static final String NUMBER_OF_ROWS = "numberOfRows";
public static final String NUMBER_OF_COLS = "numberOfCols";
}
public static class ImageProperties implements Serializable {
public static final String TRANSPARENT = "transparent";
public static final String IMAGE_TYPE = "imageType";
public static final String POSITION = "position";
public static final String ID = "id";
public static final String REPRESENTATION_HASH = "representationHash";
}
public static class TableCellProperties implements Serializable {
public static final String B_BOX = "bBox";
public static final String ROW = "row";
public static final String COL = "col";
public static final String HEADER = "header";
}
public static class DuplicateParagraphProperties implements Serializable {
public static final String UNSORTED_TEXTBLOCK_ID = "utbid";
}
public static final String RECTANGLE_DELIMITER = ";";
public static Rectangle2D parseRectangle2D(String bBox) {
List<Float> floats = Arrays.stream(bBox.split(RECTANGLE_DELIMITER))
.map(Float::parseFloat)
.toList();
return new Rectangle2D.Float(floats.get(0), floats.get(1), floats.get(2), floats.get(3));
}
public static double[] parseRepresentationVector(String representationHash) {
String[] stringArray = representationHash.split("[,\\s]+");
double[] doubleArray = new double[stringArray.length];
for (int i = 0; i < stringArray.length; i++) {
doubleArray[i] = Double.parseDouble(stringArray[i]);
}
return doubleArray;
}
public EntryData get(List<Integer> tocId) {
if (tocId.isEmpty()) {
return documentStructure.getRoot();
}
EntryData entry = documentStructure.getRoot().getChildrenList()
.get(tocId.get(0));
for (int id : tocId.subList(1, tocId.size())) {
entry = entry.getChildrenList()
.get(id);
}
return entry;
}
public Stream<EntryData> streamAllEntries() {
return flatten(documentStructure.getRoot());
}
public String toString() {
return String.join("\n",
streamAllEntries().map(EntryData::toString)
.toList());
}
private static Stream<EntryData> flatten(EntryData entry) {
return Stream.concat(Stream.of(entry),
entry.getChildrenList()
.stream()
.flatMap(DocumentStructureWrapper::flatten));
}
}

View File

@ -0,0 +1,176 @@
// Generated by the protocol buffer compiler. DO NOT EDIT!
// NO CHECKED-IN PROTOBUF GENCODE
// source: LayoutEngine.proto
// Protobuf Java Version: 4.28.3
package com.iqser.red.service.redaction.v1.server.data;
public final class LayoutEngineProto {
private LayoutEngineProto() {}
static {
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
/* major= */ 4,
/* minor= */ 28,
/* patch= */ 3,
/* suffix= */ "",
LayoutEngineProto.class.getName());
}
public static void registerAllExtensions(
com.google.protobuf.ExtensionRegistryLite registry) {
}
public static void registerAllExtensions(
com.google.protobuf.ExtensionRegistry registry) {
registerAllExtensions(
(com.google.protobuf.ExtensionRegistryLite) registry);
}
/**
* Protobuf enum {@code LayoutEngine}
*/
public enum LayoutEngine
implements com.google.protobuf.ProtocolMessageEnum {
/**
* <code>ALGORITHM = 0;</code>
*/
ALGORITHM(0),
/**
* <code>AI = 1;</code>
*/
AI(1),
/**
* <code>OUTLINE = 2;</code>
*/
OUTLINE(2),
UNRECOGNIZED(-1),
;
static {
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
/* major= */ 4,
/* minor= */ 28,
/* patch= */ 3,
/* suffix= */ "",
LayoutEngine.class.getName());
}
/**
* <code>ALGORITHM = 0;</code>
*/
public static final int ALGORITHM_VALUE = 0;
/**
* <code>AI = 1;</code>
*/
public static final int AI_VALUE = 1;
/**
* <code>OUTLINE = 2;</code>
*/
public static final int OUTLINE_VALUE = 2;
public final int getNumber() {
if (this == UNRECOGNIZED) {
throw new java.lang.IllegalArgumentException(
"Can't get the number of an unknown enum value.");
}
return value;
}
/**
* @param value The numeric wire value of the corresponding enum entry.
* @return The enum associated with the given numeric wire value.
* @deprecated Use {@link #forNumber(int)} instead.
*/
@java.lang.Deprecated
public static LayoutEngine valueOf(int value) {
return forNumber(value);
}
/**
* @param value The numeric wire value of the corresponding enum entry.
* @return The enum associated with the given numeric wire value.
*/
public static LayoutEngine forNumber(int value) {
switch (value) {
case 0: return ALGORITHM;
case 1: return AI;
case 2: return OUTLINE;
default: return null;
}
}
public static com.google.protobuf.Internal.EnumLiteMap<LayoutEngine>
internalGetValueMap() {
return internalValueMap;
}
private static final com.google.protobuf.Internal.EnumLiteMap<
LayoutEngine> internalValueMap =
new com.google.protobuf.Internal.EnumLiteMap<LayoutEngine>() {
public LayoutEngine findValueByNumber(int number) {
return LayoutEngine.forNumber(number);
}
};
public final com.google.protobuf.Descriptors.EnumValueDescriptor
getValueDescriptor() {
if (this == UNRECOGNIZED) {
throw new java.lang.IllegalStateException(
"Can't get the descriptor of an unrecognized enum value.");
}
return getDescriptor().getValues().get(ordinal());
}
public final com.google.protobuf.Descriptors.EnumDescriptor
getDescriptorForType() {
return getDescriptor();
}
public static final com.google.protobuf.Descriptors.EnumDescriptor
getDescriptor() {
return com.iqser.red.service.redaction.v1.server.data.LayoutEngineProto.getDescriptor().getEnumTypes().get(0);
}
private static final LayoutEngine[] VALUES = values();
public static LayoutEngine valueOf(
com.google.protobuf.Descriptors.EnumValueDescriptor desc) {
if (desc.getType() != getDescriptor()) {
throw new java.lang.IllegalArgumentException(
"EnumValueDescriptor is not for this type.");
}
if (desc.getIndex() == -1) {
return UNRECOGNIZED;
}
return VALUES[desc.getIndex()];
}
private final int value;
private LayoutEngine(int value) {
this.value = value;
}
// @@protoc_insertion_point(enum_scope:LayoutEngine)
}
public static com.google.protobuf.Descriptors.FileDescriptor
getDescriptor() {
return descriptor;
}
private static com.google.protobuf.Descriptors.FileDescriptor
descriptor;
static {
java.lang.String[] descriptorData = {
"\n\022LayoutEngine.proto*2\n\014LayoutEngine\022\r\n\t" +
"ALGORITHM\020\000\022\006\n\002AI\020\001\022\013\n\007OUTLINE\020\002BC\n.com." +
"iqser.red.service.redaction.v1.server.da" +
"taB\021LayoutEngineProtob\006proto3"
};
descriptor = com.google.protobuf.Descriptors.FileDescriptor
.internalBuildGeneratedFileFrom(descriptorData,
new com.google.protobuf.Descriptors.FileDescriptor[] {
});
descriptor.resolveAllFeaturesImmutable();
}
// @@protoc_insertion_point(outer_class_scope)
}

View File

@ -0,0 +1,261 @@
// Generated by the protocol buffer compiler. DO NOT EDIT!
// NO CHECKED-IN PROTOBUF GENCODE
// source: NodeType.proto
// Protobuf Java Version: 4.28.3
package com.iqser.red.service.redaction.v1.server.data;
public final class NodeTypeProto {
private NodeTypeProto() {}
static {
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
/* major= */ 4,
/* minor= */ 28,
/* patch= */ 3,
/* suffix= */ "",
NodeTypeProto.class.getName());
}
public static void registerAllExtensions(
com.google.protobuf.ExtensionRegistryLite registry) {
}
public static void registerAllExtensions(
com.google.protobuf.ExtensionRegistry registry) {
registerAllExtensions(
(com.google.protobuf.ExtensionRegistryLite) registry);
}
/**
* Protobuf enum {@code NodeType}
*/
public enum NodeType
implements com.google.protobuf.ProtocolMessageEnum {
/**
* <code>DOCUMENT = 0;</code>
*/
DOCUMENT(0),
/**
* <code>SECTION = 1;</code>
*/
SECTION(1),
/**
* <code>SUPER_SECTION = 2;</code>
*/
SUPER_SECTION(2),
/**
* <code>HEADLINE = 3;</code>
*/
HEADLINE(3),
/**
* <code>PARAGRAPH = 4;</code>
*/
PARAGRAPH(4),
/**
* <code>TABLE = 5;</code>
*/
TABLE(5),
/**
* <code>TABLE_CELL = 6;</code>
*/
TABLE_CELL(6),
/**
* <code>IMAGE = 7;</code>
*/
IMAGE(7),
/**
* <code>HEADER = 8;</code>
*/
HEADER(8),
/**
* <code>FOOTER = 9;</code>
*/
FOOTER(9),
/**
* <code>TABLE_OF_CONTENTS = 10;</code>
*/
TABLE_OF_CONTENTS(10),
/**
* <code>TABLE_OF_CONTENTS_ITEM = 11;</code>
*/
TABLE_OF_CONTENTS_ITEM(11),
UNRECOGNIZED(-1),
;
static {
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
/* major= */ 4,
/* minor= */ 28,
/* patch= */ 3,
/* suffix= */ "",
NodeType.class.getName());
}
/**
* <code>DOCUMENT = 0;</code>
*/
public static final int DOCUMENT_VALUE = 0;
/**
* <code>SECTION = 1;</code>
*/
public static final int SECTION_VALUE = 1;
/**
* <code>SUPER_SECTION = 2;</code>
*/
public static final int SUPER_SECTION_VALUE = 2;
/**
* <code>HEADLINE = 3;</code>
*/
public static final int HEADLINE_VALUE = 3;
/**
* <code>PARAGRAPH = 4;</code>
*/
public static final int PARAGRAPH_VALUE = 4;
/**
* <code>TABLE = 5;</code>
*/
public static final int TABLE_VALUE = 5;
/**
* <code>TABLE_CELL = 6;</code>
*/
public static final int TABLE_CELL_VALUE = 6;
/**
* <code>IMAGE = 7;</code>
*/
public static final int IMAGE_VALUE = 7;
/**
* <code>HEADER = 8;</code>
*/
public static final int HEADER_VALUE = 8;
/**
* <code>FOOTER = 9;</code>
*/
public static final int FOOTER_VALUE = 9;
/**
* <code>TABLE_OF_CONTENTS = 10;</code>
*/
public static final int TABLE_OF_CONTENTS_VALUE = 10;
/**
* <code>TABLE_OF_CONTENTS_ITEM = 11;</code>
*/
public static final int TABLE_OF_CONTENTS_ITEM_VALUE = 11;
public final int getNumber() {
if (this == UNRECOGNIZED) {
throw new java.lang.IllegalArgumentException(
"Can't get the number of an unknown enum value.");
}
return value;
}
/**
* @param value The numeric wire value of the corresponding enum entry.
* @return The enum associated with the given numeric wire value.
* @deprecated Use {@link #forNumber(int)} instead.
*/
@java.lang.Deprecated
public static NodeType valueOf(int value) {
return forNumber(value);
}
/**
* @param value The numeric wire value of the corresponding enum entry.
* @return The enum associated with the given numeric wire value.
*/
public static NodeType forNumber(int value) {
switch (value) {
case 0: return DOCUMENT;
case 1: return SECTION;
case 2: return SUPER_SECTION;
case 3: return HEADLINE;
case 4: return PARAGRAPH;
case 5: return TABLE;
case 6: return TABLE_CELL;
case 7: return IMAGE;
case 8: return HEADER;
case 9: return FOOTER;
case 10: return TABLE_OF_CONTENTS;
case 11: return TABLE_OF_CONTENTS_ITEM;
default: return null;
}
}
public static com.google.protobuf.Internal.EnumLiteMap<NodeType>
internalGetValueMap() {
return internalValueMap;
}
private static final com.google.protobuf.Internal.EnumLiteMap<
NodeType> internalValueMap =
new com.google.protobuf.Internal.EnumLiteMap<NodeType>() {
public NodeType findValueByNumber(int number) {
return NodeType.forNumber(number);
}
};
public final com.google.protobuf.Descriptors.EnumValueDescriptor
getValueDescriptor() {
if (this == UNRECOGNIZED) {
throw new java.lang.IllegalStateException(
"Can't get the descriptor of an unrecognized enum value.");
}
return getDescriptor().getValues().get(ordinal());
}
public final com.google.protobuf.Descriptors.EnumDescriptor
getDescriptorForType() {
return getDescriptor();
}
public static final com.google.protobuf.Descriptors.EnumDescriptor
getDescriptor() {
return com.iqser.red.service.redaction.v1.server.data.NodeTypeProto.getDescriptor().getEnumTypes().get(0);
}
private static final NodeType[] VALUES = values();
public static NodeType valueOf(
com.google.protobuf.Descriptors.EnumValueDescriptor desc) {
if (desc.getType() != getDescriptor()) {
throw new java.lang.IllegalArgumentException(
"EnumValueDescriptor is not for this type.");
}
if (desc.getIndex() == -1) {
return UNRECOGNIZED;
}
return VALUES[desc.getIndex()];
}
private final int value;
private NodeType(int value) {
this.value = value;
}
// @@protoc_insertion_point(enum_scope:NodeType)
}
public static com.google.protobuf.Descriptors.FileDescriptor
getDescriptor() {
return descriptor;
}
private static com.google.protobuf.Descriptors.FileDescriptor
descriptor;
static {
java.lang.String[] descriptorData = {
"\n\016NodeType.proto*\306\001\n\010NodeType\022\014\n\010DOCUMEN" +
"T\020\000\022\013\n\007SECTION\020\001\022\021\n\rSUPER_SECTION\020\002\022\014\n\010H" +
"EADLINE\020\003\022\r\n\tPARAGRAPH\020\004\022\t\n\005TABLE\020\005\022\016\n\nT" +
"ABLE_CELL\020\006\022\t\n\005IMAGE\020\007\022\n\n\006HEADER\020\010\022\n\n\006FO" +
"OTER\020\t\022\025\n\021TABLE_OF_CONTENTS\020\n\022\032\n\026TABLE_O" +
"F_CONTENTS_ITEM\020\013B?\n.com.iqser.red.servi" +
"ce.redaction.v1.server.dataB\rNodeTypePro" +
"tob\006proto3"
};
descriptor = com.google.protobuf.Descriptors.FileDescriptor
.internalBuildGeneratedFileFrom(descriptorData,
new com.google.protobuf.Descriptors.FileDescriptor[] {
});
descriptor.resolveAllFeaturesImmutable();
}
// @@protoc_insertion_point(outer_class_scope)
}

View File

@ -0,0 +1,606 @@
// Generated by the protocol buffer compiler. DO NOT EDIT!
// NO CHECKED-IN PROTOBUF GENCODE
// source: Range.proto
// Protobuf Java Version: 4.28.3
package com.iqser.red.service.redaction.v1.server.data;
public final class RangeProto {
private RangeProto() {}
static {
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
/* major= */ 4,
/* minor= */ 28,
/* patch= */ 3,
/* suffix= */ "",
RangeProto.class.getName());
}
public static void registerAllExtensions(
com.google.protobuf.ExtensionRegistryLite registry) {
}
public static void registerAllExtensions(
com.google.protobuf.ExtensionRegistry registry) {
registerAllExtensions(
(com.google.protobuf.ExtensionRegistryLite) registry);
}
public interface RangeOrBuilder extends
// @@protoc_insertion_point(interface_extends:Range)
com.google.protobuf.MessageOrBuilder {
/**
* <pre>
* A start index.
* </pre>
*
* <code>int32 start = 1;</code>
* @return The start.
*/
int getStart();
/**
* <pre>
* An end index.
* </pre>
*
* <code>int32 end = 2;</code>
* @return The end.
*/
int getEnd();
}
/**
* Protobuf type {@code Range}
*/
public static final class Range extends
com.google.protobuf.GeneratedMessage implements
// @@protoc_insertion_point(message_implements:Range)
RangeOrBuilder {
private static final long serialVersionUID = 0L;
static {
com.google.protobuf.RuntimeVersion.validateProtobufGencodeVersion(
com.google.protobuf.RuntimeVersion.RuntimeDomain.PUBLIC,
/* major= */ 4,
/* minor= */ 28,
/* patch= */ 3,
/* suffix= */ "",
Range.class.getName());
}
// Use Range.newBuilder() to construct.
private Range(com.google.protobuf.GeneratedMessage.Builder<?> builder) {
super(builder);
}
private Range() {
}
public static final com.google.protobuf.Descriptors.Descriptor
getDescriptor() {
return com.iqser.red.service.redaction.v1.server.data.RangeProto.internal_static_Range_descriptor;
}
@java.lang.Override
protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
internalGetFieldAccessorTable() {
return com.iqser.red.service.redaction.v1.server.data.RangeProto.internal_static_Range_fieldAccessorTable
.ensureFieldAccessorsInitialized(
com.iqser.red.service.redaction.v1.server.data.RangeProto.Range.class, com.iqser.red.service.redaction.v1.server.data.RangeProto.Range.Builder.class);
}
public static final int START_FIELD_NUMBER = 1;
private int start_ = 0;
/**
* <pre>
* A start index.
* </pre>
*
* <code>int32 start = 1;</code>
* @return The start.
*/
@java.lang.Override
public int getStart() {
return start_;
}
public static final int END_FIELD_NUMBER = 2;
private int end_ = 0;
/**
* <pre>
* An end index.
* </pre>
*
* <code>int32 end = 2;</code>
* @return The end.
*/
@java.lang.Override
public int getEnd() {
return end_;
}
private byte memoizedIsInitialized = -1;
@java.lang.Override
public final boolean isInitialized() {
byte isInitialized = memoizedIsInitialized;
if (isInitialized == 1) return true;
if (isInitialized == 0) return false;
memoizedIsInitialized = 1;
return true;
}
@java.lang.Override
public void writeTo(com.google.protobuf.CodedOutputStream output)
throws java.io.IOException {
if (start_ != 0) {
output.writeInt32(1, start_);
}
if (end_ != 0) {
output.writeInt32(2, end_);
}
getUnknownFields().writeTo(output);
}
@java.lang.Override
public int getSerializedSize() {
int size = memoizedSize;
if (size != -1) return size;
size = 0;
if (start_ != 0) {
size += com.google.protobuf.CodedOutputStream
.computeInt32Size(1, start_);
}
if (end_ != 0) {
size += com.google.protobuf.CodedOutputStream
.computeInt32Size(2, end_);
}
size += getUnknownFields().getSerializedSize();
memoizedSize = size;
return size;
}
@java.lang.Override
public boolean equals(final java.lang.Object obj) {
if (obj == this) {
return true;
}
if (!(obj instanceof com.iqser.red.service.redaction.v1.server.data.RangeProto.Range)) {
return super.equals(obj);
}
com.iqser.red.service.redaction.v1.server.data.RangeProto.Range other = (com.iqser.red.service.redaction.v1.server.data.RangeProto.Range) obj;
if (getStart()
!= other.getStart()) return false;
if (getEnd()
!= other.getEnd()) return false;
if (!getUnknownFields().equals(other.getUnknownFields())) return false;
return true;
}
@java.lang.Override
public int hashCode() {
if (memoizedHashCode != 0) {
return memoizedHashCode;
}
int hash = 41;
hash = (19 * hash) + getDescriptor().hashCode();
hash = (37 * hash) + START_FIELD_NUMBER;
hash = (53 * hash) + getStart();
hash = (37 * hash) + END_FIELD_NUMBER;
hash = (53 * hash) + getEnd();
hash = (29 * hash) + getUnknownFields().hashCode();
memoizedHashCode = hash;
return hash;
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(
java.nio.ByteBuffer data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(
java.nio.ByteBuffer data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(
com.google.protobuf.ByteString data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(
com.google.protobuf.ByteString data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(byte[] data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(
byte[] data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(java.io.InputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessage
.parseWithIOException(PARSER, input);
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(
java.io.InputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessage
.parseWithIOException(PARSER, input, extensionRegistry);
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseDelimitedFrom(java.io.InputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessage
.parseDelimitedWithIOException(PARSER, input);
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseDelimitedFrom(
java.io.InputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessage
.parseDelimitedWithIOException(PARSER, input, extensionRegistry);
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(
com.google.protobuf.CodedInputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessage
.parseWithIOException(PARSER, input);
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range parseFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessage
.parseWithIOException(PARSER, input, extensionRegistry);
}
@java.lang.Override
public Builder newBuilderForType() { return newBuilder(); }
public static Builder newBuilder() {
return DEFAULT_INSTANCE.toBuilder();
}
public static Builder newBuilder(com.iqser.red.service.redaction.v1.server.data.RangeProto.Range prototype) {
return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype);
}
@java.lang.Override
public Builder toBuilder() {
return this == DEFAULT_INSTANCE
? new Builder() : new Builder().mergeFrom(this);
}
@java.lang.Override
protected Builder newBuilderForType(
com.google.protobuf.GeneratedMessage.BuilderParent parent) {
Builder builder = new Builder(parent);
return builder;
}
/**
* Protobuf type {@code Range}
*/
public static final class Builder extends
com.google.protobuf.GeneratedMessage.Builder<Builder> implements
// @@protoc_insertion_point(builder_implements:Range)
com.iqser.red.service.redaction.v1.server.data.RangeProto.RangeOrBuilder {
public static final com.google.protobuf.Descriptors.Descriptor
getDescriptor() {
return com.iqser.red.service.redaction.v1.server.data.RangeProto.internal_static_Range_descriptor;
}
@java.lang.Override
protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
internalGetFieldAccessorTable() {
return com.iqser.red.service.redaction.v1.server.data.RangeProto.internal_static_Range_fieldAccessorTable
.ensureFieldAccessorsInitialized(
com.iqser.red.service.redaction.v1.server.data.RangeProto.Range.class, com.iqser.red.service.redaction.v1.server.data.RangeProto.Range.Builder.class);
}
// Construct using com.iqser.red.service.redaction.v1.server.data.RangeProto.Range.newBuilder()
private Builder() {
}
private Builder(
com.google.protobuf.GeneratedMessage.BuilderParent parent) {
super(parent);
}
@java.lang.Override
public Builder clear() {
super.clear();
bitField0_ = 0;
start_ = 0;
end_ = 0;
return this;
}
@java.lang.Override
public com.google.protobuf.Descriptors.Descriptor
getDescriptorForType() {
return com.iqser.red.service.redaction.v1.server.data.RangeProto.internal_static_Range_descriptor;
}
@java.lang.Override
public com.iqser.red.service.redaction.v1.server.data.RangeProto.Range getDefaultInstanceForType() {
return com.iqser.red.service.redaction.v1.server.data.RangeProto.Range.getDefaultInstance();
}
@java.lang.Override
public com.iqser.red.service.redaction.v1.server.data.RangeProto.Range build() {
com.iqser.red.service.redaction.v1.server.data.RangeProto.Range result = buildPartial();
if (!result.isInitialized()) {
throw newUninitializedMessageException(result);
}
return result;
}
@java.lang.Override
public com.iqser.red.service.redaction.v1.server.data.RangeProto.Range buildPartial() {
com.iqser.red.service.redaction.v1.server.data.RangeProto.Range result = new com.iqser.red.service.redaction.v1.server.data.RangeProto.Range(this);
if (bitField0_ != 0) { buildPartial0(result); }
onBuilt();
return result;
}
private void buildPartial0(com.iqser.red.service.redaction.v1.server.data.RangeProto.Range result) {
int from_bitField0_ = bitField0_;
if (((from_bitField0_ & 0x00000001) != 0)) {
result.start_ = start_;
}
if (((from_bitField0_ & 0x00000002) != 0)) {
result.end_ = end_;
}
}
@java.lang.Override
public Builder mergeFrom(com.google.protobuf.Message other) {
if (other instanceof com.iqser.red.service.redaction.v1.server.data.RangeProto.Range) {
return mergeFrom((com.iqser.red.service.redaction.v1.server.data.RangeProto.Range)other);
} else {
super.mergeFrom(other);
return this;
}
}
public Builder mergeFrom(com.iqser.red.service.redaction.v1.server.data.RangeProto.Range other) {
if (other == com.iqser.red.service.redaction.v1.server.data.RangeProto.Range.getDefaultInstance()) return this;
if (other.getStart() != 0) {
setStart(other.getStart());
}
if (other.getEnd() != 0) {
setEnd(other.getEnd());
}
this.mergeUnknownFields(other.getUnknownFields());
onChanged();
return this;
}
@java.lang.Override
public final boolean isInitialized() {
return true;
}
@java.lang.Override
public Builder mergeFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
if (extensionRegistry == null) {
throw new java.lang.NullPointerException();
}
try {
boolean done = false;
while (!done) {
int tag = input.readTag();
switch (tag) {
case 0:
done = true;
break;
case 8: {
start_ = input.readInt32();
bitField0_ |= 0x00000001;
break;
} // case 8
case 16: {
end_ = input.readInt32();
bitField0_ |= 0x00000002;
break;
} // case 16
default: {
if (!super.parseUnknownField(input, extensionRegistry, tag)) {
done = true; // was an endgroup tag
}
break;
} // default:
} // switch (tag)
} // while (!done)
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
throw e.unwrapIOException();
} finally {
onChanged();
} // finally
return this;
}
private int bitField0_;
private int start_ ;
/**
* <pre>
* A start index.
* </pre>
*
* <code>int32 start = 1;</code>
* @return The start.
*/
@java.lang.Override
public int getStart() {
return start_;
}
/**
* <pre>
* A start index.
* </pre>
*
* <code>int32 start = 1;</code>
* @param value The start to set.
* @return This builder for chaining.
*/
public Builder setStart(int value) {
start_ = value;
bitField0_ |= 0x00000001;
onChanged();
return this;
}
/**
* <pre>
* A start index.
* </pre>
*
* <code>int32 start = 1;</code>
* @return This builder for chaining.
*/
public Builder clearStart() {
bitField0_ = (bitField0_ & ~0x00000001);
start_ = 0;
onChanged();
return this;
}
private int end_ ;
/**
* <pre>
* An end index.
* </pre>
*
* <code>int32 end = 2;</code>
* @return The end.
*/
@java.lang.Override
public int getEnd() {
return end_;
}
/**
* <pre>
* An end index.
* </pre>
*
* <code>int32 end = 2;</code>
* @param value The end to set.
* @return This builder for chaining.
*/
public Builder setEnd(int value) {
end_ = value;
bitField0_ |= 0x00000002;
onChanged();
return this;
}
/**
* <pre>
* An end index.
* </pre>
*
* <code>int32 end = 2;</code>
* @return This builder for chaining.
*/
public Builder clearEnd() {
bitField0_ = (bitField0_ & ~0x00000002);
end_ = 0;
onChanged();
return this;
}
// @@protoc_insertion_point(builder_scope:Range)
}
// @@protoc_insertion_point(class_scope:Range)
private static final com.iqser.red.service.redaction.v1.server.data.RangeProto.Range DEFAULT_INSTANCE;
static {
DEFAULT_INSTANCE = new com.iqser.red.service.redaction.v1.server.data.RangeProto.Range();
}
public static com.iqser.red.service.redaction.v1.server.data.RangeProto.Range getDefaultInstance() {
return DEFAULT_INSTANCE;
}
private static final com.google.protobuf.Parser<Range>
PARSER = new com.google.protobuf.AbstractParser<Range>() {
@java.lang.Override
public Range parsePartialFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
Builder builder = newBuilder();
try {
builder.mergeFrom(input, extensionRegistry);
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
throw e.setUnfinishedMessage(builder.buildPartial());
} catch (com.google.protobuf.UninitializedMessageException e) {
throw e.asInvalidProtocolBufferException().setUnfinishedMessage(builder.buildPartial());
} catch (java.io.IOException e) {
throw new com.google.protobuf.InvalidProtocolBufferException(e)
.setUnfinishedMessage(builder.buildPartial());
}
return builder.buildPartial();
}
};
public static com.google.protobuf.Parser<Range> parser() {
return PARSER;
}
@java.lang.Override
public com.google.protobuf.Parser<Range> getParserForType() {
return PARSER;
}
@java.lang.Override
public com.iqser.red.service.redaction.v1.server.data.RangeProto.Range getDefaultInstanceForType() {
return DEFAULT_INSTANCE;
}
}
private static final com.google.protobuf.Descriptors.Descriptor
internal_static_Range_descriptor;
private static final
com.google.protobuf.GeneratedMessage.FieldAccessorTable
internal_static_Range_fieldAccessorTable;
public static com.google.protobuf.Descriptors.FileDescriptor
getDescriptor() {
return descriptor;
}
private static com.google.protobuf.Descriptors.FileDescriptor
descriptor;
static {
java.lang.String[] descriptorData = {
"\n\013Range.proto\"#\n\005Range\022\r\n\005start\030\001 \001(\005\022\013\n" +
"\003end\030\002 \001(\005B<\n.com.iqser.red.service.reda" +
"ction.v1.server.dataB\nRangeProtob\006proto3"
};
descriptor = com.google.protobuf.Descriptors.FileDescriptor
.internalBuildGeneratedFileFrom(descriptorData,
new com.google.protobuf.Descriptors.FileDescriptor[] {
});
internal_static_Range_descriptor =
getDescriptor().getMessageTypes().get(0);
internal_static_Range_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_Range_descriptor,
new java.lang.String[] { "Start", "End", });
descriptor.resolveAllFeaturesImmutable();
}
// @@protoc_insertion_point(outer_class_scope)
}

View File

@ -0,0 +1,25 @@
package com.iqser.red.service.redaction.v1.server.data.old;
import java.io.Serializable;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.FieldDefaults;
@Deprecated
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
public class DocumentPage implements Serializable {
int number;
int height;
int width;
int rotation;
}

View File

@ -0,0 +1,24 @@
package com.iqser.red.service.redaction.v1.server.data.old;
import java.io.Serializable;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.FieldDefaults;
@Deprecated
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
public class DocumentPositionData implements Serializable {
Long id;
int[] stringIdxToPositionIdx;
float[][] positions;
}

View File

@ -0,0 +1,158 @@
package com.iqser.red.service.redaction.v1.server.data.old;
import java.awt.geom.Rectangle2D;
import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Stream;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.FieldDefaults;
@Deprecated
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
public class DocumentStructure implements Serializable {
EntryData root;
public static class TableProperties implements Serializable {
public static final String NUMBER_OF_ROWS = "numberOfRows";
public static final String NUMBER_OF_COLS = "numberOfCols";
}
public static class ImageProperties implements Serializable {
public static final String TRANSPARENT = "transparent";
public static final String IMAGE_TYPE = "imageType";
public static final String POSITION = "position";
public static final String ID = "id";
public static final String REPRESENTATION_HASH = "representationHash";
}
public static class TableCellProperties implements Serializable {
public static final String B_BOX = "bBox";
public static final String ROW = "row";
public static final String COL = "col";
public static final String HEADER = "header";
}
public static class DuplicateParagraphProperties implements Serializable {
public static final String UNSORTED_TEXTBLOCK_ID = "utbid";
}
public static final String RECTANGLE_DELIMITER = ";";
public static Rectangle2D parseRectangle2D(String bBox) {
List<Float> floats = Arrays.stream(bBox.split(RECTANGLE_DELIMITER))
.map(Float::parseFloat)
.toList();
return new Rectangle2D.Float(floats.get(0), floats.get(1), floats.get(2), floats.get(3));
}
public static double[] parseRepresentationVector(String representationHash) {
String[] stringArray = representationHash.split("[,\\s]+");
double[] doubleArray = new double[stringArray.length];
for (int i = 0; i < stringArray.length; i++) {
doubleArray[i] = Double.parseDouble(stringArray[i]);
}
return doubleArray;
}
public EntryData get(List<Integer> tocId) {
if (tocId.isEmpty()) {
return root;
}
EntryData entry = root.children.get(tocId.get(0));
for (int id : tocId.subList(1, tocId.size())) {
entry = entry.children.get(id);
}
return entry;
}
public Stream<EntryData> streamAllEntries() {
return Stream.concat(Stream.of(root), root.children.stream())
.flatMap(DocumentStructure::flatten);
}
public String toString() {
return String.join("\n",
streamAllEntries().map(EntryData::toString)
.toList());
}
private static Stream<EntryData> flatten(EntryData entry) {
return Stream.concat(Stream.of(entry),
entry.children.stream()
.flatMap(DocumentStructure::flatten));
}
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
public static class EntryData implements Serializable {
NodeType type;
int[] treeId;
Long[] atomicBlockIds;
Long[] pageNumbers;
Map<String, String> properties;
List<EntryData> children;
Set<LayoutEngine> engines;
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("[");
for (int i : treeId) {
sb.append(i);
sb.append(",");
}
sb.delete(sb.length() - 1, sb.length());
sb.append("]: ");
sb.append(type);
sb.append(" atbs = ");
sb.append(atomicBlockIds.length);
return sb.toString();
}
}
}

View File

@ -0,0 +1,28 @@
package com.iqser.red.service.redaction.v1.server.data.old;
import java.io.Serializable;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.FieldDefaults;
@Deprecated
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
public class DocumentTextData implements Serializable {
Long id;
Long page;
String searchText;
int numberOnPage;
int start;
int end;
int[] lineBreaks;
}

View File

@ -0,0 +1,8 @@
package com.iqser.red.service.redaction.v1.server.data.old;
@Deprecated
public enum LayoutEngine {
ALGORITHM,
AI,
OUTLINE
}

View File

@ -0,0 +1,24 @@
package com.iqser.red.service.redaction.v1.server.data.old;
import java.io.Serializable;
import java.util.Locale;
@Deprecated
public enum NodeType implements Serializable {
DOCUMENT,
SECTION,
SUPER_SECTION,
HEADLINE,
PARAGRAPH,
TABLE,
TABLE_CELL,
IMAGE,
HEADER,
FOOTER;
public String toString() {
return this.name().charAt(0) + this.name().substring(1).toLowerCase(Locale.ROOT);
}
}

View File

@ -0,0 +1,199 @@
package com.iqser.red.service.redaction.v1.server.mapper;
import java.awt.geom.Rectangle2D;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.data.DocumentData;
import com.iqser.red.service.redaction.v1.server.data.DocumentPageProto.AllDocumentPages;
import com.iqser.red.service.redaction.v1.server.data.DocumentPageProto.DocumentPage;
import com.iqser.red.service.redaction.v1.server.data.DocumentPositionDataProto.AllDocumentPositionData;
import com.iqser.red.service.redaction.v1.server.data.DocumentPositionDataProto.DocumentPositionData;
import com.iqser.red.service.redaction.v1.server.data.DocumentPositionDataProto.DocumentPositionData.Position;
import com.iqser.red.service.redaction.v1.server.data.DocumentStructureProto.DocumentStructure;
import com.iqser.red.service.redaction.v1.server.data.DocumentStructureWrapper;
import com.iqser.red.service.redaction.v1.server.data.DocumentTextDataProto.AllDocumentTextData;
import com.iqser.red.service.redaction.v1.server.data.DocumentTextDataProto.DocumentTextData;
import com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData;
import com.iqser.red.service.redaction.v1.server.data.LayoutEngineProto;
import com.iqser.red.service.redaction.v1.server.data.NodeTypeProto;
import com.iqser.red.service.redaction.v1.server.data.RangeProto;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.DuplicatedParagraph;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import lombok.experimental.UtilityClass;
@UtilityClass
public class DocumentDataMapper {
public DocumentData toDocumentData(Document document) {
List<DocumentTextData> documentTextData = document.streamTerminalTextBlocksInOrder()
.flatMap(textBlock -> textBlock.getAtomicTextBlocks()
.stream())
.distinct()
.map(DocumentDataMapper::toAtomicTextBlockData)
.toList();
AllDocumentTextData allDocumentTextData = AllDocumentTextData.newBuilder().addAllDocumentTextData(documentTextData).build();
List<DocumentPositionData> atomicPositionBlockData = document.streamTerminalTextBlocksInOrder()
.flatMap(textBlock -> textBlock.getAtomicTextBlocks()
.stream())
.distinct()
.map(DocumentDataMapper::toAtomicPositionBlockData)
.toList();
AllDocumentPositionData allDocumentPositionData = AllDocumentPositionData.newBuilder().addAllDocumentPositionData(atomicPositionBlockData).build();
List<DocumentPage> documentPageData = document.getPages()
.stream()
.sorted(Comparator.comparingInt(Page::getNumber))
.map(DocumentDataMapper::toPageData)
.toList();
AllDocumentPages allDocumentPages = AllDocumentPages.newBuilder().addAllDocumentPages(documentPageData).build();
DocumentStructureWrapper tableOfContentsData = toDocumentTreeData(document.getDocumentTree());
return DocumentData.builder()
.documentTextData(allDocumentTextData)
.documentPositionData(allDocumentPositionData)
.documentPages(allDocumentPages)
.documentStructureWrapper(tableOfContentsData)
.build();
}
private DocumentStructureWrapper toDocumentTreeData(DocumentTree documentTree) {
return new DocumentStructureWrapper(DocumentStructure.newBuilder().setRoot(toEntryData(documentTree.getRoot())).build());
}
private EntryData toEntryData(DocumentTree.Entry entry) {
List<Long> atomicTextBlocks;
if (entry.getNode().isLeaf()) {
atomicTextBlocks = toAtomicTextBlockIds(entry.getNode().getLeafTextBlock());
} else {
atomicTextBlocks = new ArrayList<>();
}
Map<String, String> properties = switch (entry.getType()) {
case TABLE -> PropertiesMapper.buildTableProperties((Table) entry.getNode());
case TABLE_CELL -> PropertiesMapper.buildTableCellProperties((TableCell) entry.getNode());
case IMAGE -> PropertiesMapper.buildImageProperties((Image) entry.getNode());
case PARAGRAPH ->
entry.getNode() instanceof DuplicatedParagraph duplicatedParagraph ? PropertiesMapper.buildDuplicateParagraphProperties(duplicatedParagraph) : new HashMap<>();
default -> new HashMap<>();
};
var documentBuilder = EntryData.newBuilder()
.addAllTreeId(entry.getTreeId())
.addAllChildren(entry.getChildren()
.stream()
.map(DocumentDataMapper::toEntryData)
.toList())
.setType(resolveType(entry.getType()))
.addAllAtomicBlockIds(atomicTextBlocks)
.addAllPageNumbers(entry.getNode().getPages()
.stream()
.map(Page::getNumber)
.map(Integer::longValue)
.toList())
.putAllProperties(properties);
if (entry.getNode() != null) {
documentBuilder.addAllEngines(entry.getNode().getEngines()
.stream()
.map(engine -> LayoutEngineProto.LayoutEngine.valueOf(engine.name()))
.toList());
} else {
documentBuilder.addAllEngines(new HashSet<>(Set.of(LayoutEngineProto.LayoutEngine.ALGORITHM)));
}
return documentBuilder.build();
}
private static NodeTypeProto.NodeType resolveType(NodeType type) {
return NodeTypeProto.NodeType.valueOf(type.name());
}
private List<Long> toAtomicTextBlockIds(TextBlock textBlock) {
return textBlock.getAtomicTextBlocks()
.stream()
.map(AtomicTextBlock::getId)
.toList();
}
private DocumentPage toPageData(Page p) {
return DocumentPage.newBuilder().setRotation(p.getRotation()).setHeight(p.getHeight()).setWidth(p.getWidth()).setNumber(p.getNumber()).build();
}
private DocumentTextData toAtomicTextBlockData(AtomicTextBlock atomicTextBlock) {
return DocumentTextData.newBuilder()
.setId(atomicTextBlock.getId())
.setPage(atomicTextBlock.getPage().getNumber().longValue())
.setSearchText(atomicTextBlock.getSearchText())
.setNumberOnPage(atomicTextBlock.getNumberOnPage())
.setStart(atomicTextBlock.getTextRange().start())
.setEnd(atomicTextBlock.getTextRange().end())
.addAllLineBreaks(atomicTextBlock.getLineBreaks())
.addAllItalicTextRanges(atomicTextBlock.getItalicTextRanges()
.stream()
.map(r -> RangeProto.Range.newBuilder().setStart(r.start()).setEnd(r.end()).build())
.toList())
.addAllBoldTextRanges(atomicTextBlock.getBoldTextRanges()
.stream()
.map(r -> RangeProto.Range.newBuilder().setStart(r.start()).setEnd(r.end()).build())
.toList())
.build();
}
private DocumentPositionData toAtomicPositionBlockData(AtomicTextBlock atomicTextBlock) {
return DocumentPositionData.newBuilder()
.setId(atomicTextBlock.getId())
.addAllPositions(toPositions(atomicTextBlock.getPositions()))
.addAllStringIdxToPositionIdx(atomicTextBlock.getStringIdxToPositionIdx())
.build();
}
private static List<Position> toPositions(List<Rectangle2D> rects) {
List<Position> positions = new ArrayList<>();
for (Rectangle2D rect : rects) {
positions.add(toPosition(rect));
}
return positions;
}
private static Position toPosition(Rectangle2D rect) {
return Position.newBuilder().addValue((float) rect.getMinX()).addValue((float) rect.getMinY()).addValue((float) rect.getWidth()).addValue((float) rect.getHeight()).build();
}
}

View File

@ -1,15 +1,18 @@
package com.iqser.red.service.redaction.v1.server.service.document;
package com.iqser.red.service.redaction.v1.server.mapper;
import static com.iqser.red.service.redaction.v1.server.data.DocumentPageProto.DocumentPage;
import static com.iqser.red.service.redaction.v1.server.data.DocumentPositionDataProto.AllDocumentPositionData;
import static com.iqser.red.service.redaction.v1.server.data.DocumentTextDataProto.AllDocumentTextData;
import static com.iqser.red.service.redaction.v1.server.data.EntryDataProto.EntryData;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentData;
import com.iqser.red.service.redaction.v1.server.data.DocumentData;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.DuplicatedParagraph;
@ -17,6 +20,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Header;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.LayoutEngine;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section;
@ -24,13 +28,11 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNo
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SuperSection;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableOfContents;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableOfContentsItem;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPage;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextData;
import lombok.experimental.UtilityClass;
@ -43,79 +45,89 @@ public class DocumentGraphMapper {
DocumentTree documentTree = new DocumentTree(document);
Context context = new Context(documentData, documentTree);
context.pageData.addAll(Arrays.stream(documentData.getDocumentPages())
.map(DocumentGraphMapper::buildPage)
.toList());
context.documentTree.getRoot().getChildren().addAll(buildEntries(documentData.getDocumentStructure().getRoot().getChildren(), context));
context.documentTree.getRoot().getChildren().addAll(buildEntries(documentData.getDocumentStructure().getRoot().getChildrenList(), context));
document.setDocumentTree(context.documentTree);
document.setPages(new HashSet<>(context.pageData));
document.setNumberOfPages(documentData.getDocumentPages().length);
document.setNumberOfPages(documentData.getDocumentPages().getDocumentPagesCount());
document.setTextBlock(document.getTextBlock());
return document;
}
private List<DocumentTree.Entry> buildEntries(List<DocumentStructure.EntryData> entries, Context context) {
private List<DocumentTree.Entry> buildEntries(List<EntryData> entries, Context context) {
List<DocumentTree.Entry> newEntries = new ArrayList<>(entries.size());
for (DocumentStructure.EntryData entryData : entries) {
for (EntryData entryData : entries) {
List<Page> pages = Arrays.stream(entryData.getPageNumbers())
.map(pageNumber -> getPage(pageNumber, context))
List<Page> pages = entryData.getPageNumbersList()
.stream()
.map(context::getPage)
.toList();
SemanticNode node = switch (entryData.getType()) {
case SECTION -> buildSection(context);
case SUPER_SECTION -> buildSuperSection(context);
case PARAGRAPH -> buildParagraph(context, entryData.getProperties());
case PARAGRAPH -> buildParagraph(context, entryData.getPropertiesMap());
case HEADLINE -> buildHeadline(context);
case HEADER -> buildHeader(context);
case FOOTER -> buildFooter(context);
case TABLE -> buildTable(context, entryData.getProperties());
case TABLE_CELL -> buildTableCell(context, entryData.getProperties());
case IMAGE -> buildImage(context, entryData.getProperties(), entryData.getPageNumbers());
case TABLE -> buildTable(context, entryData.getPropertiesMap());
case TABLE_CELL -> buildTableCell(context, entryData.getPropertiesMap());
case IMAGE -> buildImage(context, entryData.getPropertiesMap(), entryData.getPageNumbersList());
case TABLE_OF_CONTENTS -> buildTableOfContents(context);
case TABLE_OF_CONTENTS_ITEM -> buildTableOfContentsItem(context);
default -> throw new UnsupportedOperationException("Not yet implemented for type " + entryData.getType());
};
if (entryData.getAtomicBlockIds().length > 0) {
TextBlock textBlock = toTextBlock(entryData.getAtomicBlockIds(), context, node);
if (entryData.getAtomicBlockIdsCount() > 0) {
TextBlock textBlock = toTextBlock(entryData.getAtomicBlockIdsList(), context, node);
node.setLeafTextBlock(textBlock);
switch (entryData.getType()) {
case HEADER -> pages.forEach(page -> page.setHeader((Header) node));
case FOOTER -> pages.forEach(page -> page.setFooter((Footer) node));
case IMAGE -> pages.forEach(page -> page.getImages().add((Image) node));
default -> textBlock.getAtomicTextBlocks()
.forEach(atb -> atb.getPage().getTextBlocksOnPage().add(atb));
}
}
List<Integer> treeId = Arrays.stream(entryData.getTreeId()).boxed()
.toList();
if (entryData.getEngines() != null) {
entryData.getEngines()
.forEach(node::addEngine);
} else {
entryData.setEngines(Collections.emptySet());
}
List<Integer> treeId = entryData.getTreeIdList();
node.setTreeId(treeId);
switch (entryData.getType()) {
case HEADER -> pages.forEach(page -> page.setHeader((Header) node));
case FOOTER -> pages.forEach(page -> page.setFooter((Footer) node));
default -> pages.forEach(page -> page.getMainBody().add(node));
}
entryData.getEnginesList()
.stream()
.map(engine -> LayoutEngine.valueOf(engine.name()))
.forEach(node::addEngine);
newEntries.add(DocumentTree.Entry.builder().treeId(treeId).children(buildEntries(entryData.getChildren(), context)).node(node).build());
newEntries.add(DocumentTree.Entry.builder().treeId(treeId).children(buildEntries(entryData.getChildrenList(), context)).node(node).build());
}
return newEntries;
}
private static TableOfContents buildTableOfContents(Context context) {
return TableOfContents.builder().documentTree(context.documentTree).build();
}
private static TableOfContentsItem buildTableOfContentsItem(Context context) {
return TableOfContentsItem.builder().documentTree(context.documentTree).build();
}
private Headline buildHeadline(Context context) {
return Headline.builder().documentTree(context.documentTree).build();
}
private Image buildImage(Context context, Map<String, String> properties, Long[] pageNumbers) {
private Image buildImage(Context context, Map<String, String> properties, List<Long> pageNumbers) {
assert pageNumbers.length == 1;
Page page = getPage(pageNumbers[0], context);
assert pageNumbers.size() == 1;
Page page = context.getPage(pageNumbers.get(0));
var builder = Image.builder();
PropertiesMapper.parseImageProperties(properties, builder);
return builder.documentTree(context.documentTree).page(page).build();
@ -161,13 +173,14 @@ public class DocumentGraphMapper {
return SuperSection.builder().documentTree(context.documentTree).build();
}
private Paragraph buildParagraph(Context context, Map<String, String> properties) {
if (PropertiesMapper.isDuplicateParagraph(properties)) {
DuplicatedParagraph duplicatedParagraph = DuplicatedParagraph.builder().documentTree(context.documentTree).build();
Long[] unsortedTextblockIds = PropertiesMapper.getUnsortedTextblockIds(properties);
var unsortedTextblockIds = PropertiesMapper.getUnsortedTextblockIds(properties);
duplicatedParagraph.setUnsortedLeafTextBlock(toTextBlock(unsortedTextblockIds, context, duplicatedParagraph));
return duplicatedParagraph;
}
@ -176,9 +189,9 @@ public class DocumentGraphMapper {
}
private TextBlock toTextBlock(Long[] atomicTextBlockIds, Context context, SemanticNode parent) {
private TextBlock toTextBlock(List<Long> atomicTextBlockIds, Context context, SemanticNode parent) {
return Arrays.stream(atomicTextBlockIds)
return atomicTextBlockIds.stream()
.map(atomicTextBlockId -> getAtomicTextBlock(context, parent, atomicTextBlockId))
.collect(new TextBlockCollector());
}
@ -186,24 +199,16 @@ public class DocumentGraphMapper {
private AtomicTextBlock getAtomicTextBlock(Context context, SemanticNode parent, Long atomicTextBlockId) {
return AtomicTextBlock.fromAtomicTextBlockData(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)),
context.documentPositionData.get(Math.toIntExact(atomicTextBlockId)),
return AtomicTextBlock.fromAtomicTextBlockData(context.documentTextData.getDocumentTextData(Math.toIntExact(atomicTextBlockId)),
context.documentPositionData.getDocumentPositionData(Math.toIntExact(atomicTextBlockId)),
parent,
getPage(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)).getPage(), context));
context.getPage(context.documentTextData.getDocumentTextData(Math.toIntExact(atomicTextBlockId)).getPage()));
}
private Page buildPage(DocumentPage p) {
return Page.builder().rotation(p.getRotation()).height(p.getHeight()).width(p.getWidth()).number(p.getNumber()).mainBody(new LinkedList<>()).build();
}
private Page getPage(Long pageIndex, Context context) {
Page page = context.pageData.get(Math.toIntExact(pageIndex) - 1);
assert page.getNumber() == Math.toIntExact(pageIndex);
return page;
return Page.builder().rotation(p.getRotation()).height(p.getHeight()).width(p.getWidth()).number(p.getNumber()).textBlocksOnPage(new LinkedList<>()).build();
}
@ -211,21 +216,33 @@ public class DocumentGraphMapper {
private final DocumentTree documentTree;
private final List<Page> pageData;
private final List<DocumentTextData> documentTextData;
private final List<DocumentPositionData> documentPositionData;
private final AllDocumentTextData documentTextData;
private final AllDocumentPositionData documentPositionData;
Context(DocumentData documentData, DocumentTree documentTree) {
this.documentTree = documentTree;
this.pageData = new ArrayList<>();
this.documentTextData = Arrays.stream(documentData.getDocumentTextData())
.toList();
this.documentPositionData = Arrays.stream(documentData.getDocumentPositionData())
this.pageData = documentData.getDocumentPages().getDocumentPagesList()
.stream()
.map(DocumentGraphMapper::buildPage)
.sorted(Comparator.comparingInt(Page::getNumber))
.toList();
this.documentTextData = documentData.getDocumentTextData();
this.documentPositionData = documentData.getDocumentPositionData();
}
public Page getPage(Long pageIndex) {
Page page = pageData.get(Math.toIntExact(pageIndex) - 1);
assert page.getNumber() == Math.toIntExact(pageIndex);
return page;
}
}
}

View File

@ -0,0 +1,152 @@
package com.iqser.red.service.redaction.v1.server.mapper;
import java.awt.geom.Rectangle2D;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import com.iqser.red.service.redaction.v1.server.data.DocumentStructureWrapper;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.DuplicatedParagraph;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import lombok.experimental.UtilityClass;
@UtilityClass
public class PropertiesMapper {
public static Map<String, String> buildImageProperties(Image image) {
Map<String, String> properties = new HashMap<>();
properties.put(DocumentStructureWrapper.ImageProperties.IMAGE_TYPE, image.getImageType().name());
properties.put(DocumentStructureWrapper.ImageProperties.TRANSPARENT, String.valueOf(image.isTransparent()));
properties.put(DocumentStructureWrapper.ImageProperties.POSITION, toString(image.getPosition()));
properties.put(DocumentStructureWrapper.ImageProperties.ID, image.getId());
properties.put(DocumentStructureWrapper.ImageProperties.REPRESENTATION_HASH, image.getRepresentationHash());
return properties;
}
public static Map<String, String> buildTableCellProperties(TableCell tableCell) {
Map<String, String> properties = new HashMap<>();
properties.put(DocumentStructureWrapper.TableCellProperties.ROW, String.valueOf(tableCell.getRow()));
properties.put(DocumentStructureWrapper.TableCellProperties.COL, String.valueOf(tableCell.getCol()));
properties.put(DocumentStructureWrapper.TableCellProperties.HEADER, String.valueOf(tableCell.isHeader()));
if (tableCell.getPages().size() > 1 || tableCell.getBBox().keySet().size() > 1) {
throw new IllegalArgumentException("TableCell can only occur on a single page!");
}
String bBoxString = toString(tableCell.getBBox()
.get(tableCell.getPages()
.stream()
.findFirst()
.get()));
properties.put(DocumentStructureWrapper.TableCellProperties.B_BOX, bBoxString);
return properties;
}
public static Map<String, String> buildTableProperties(Table table) {
Map<String, String> properties = new HashMap<>();
properties.put(DocumentStructureWrapper.TableProperties.NUMBER_OF_ROWS, String.valueOf(table.getNumberOfRows()));
properties.put(DocumentStructureWrapper.TableProperties.NUMBER_OF_COLS, String.valueOf(table.getNumberOfCols()));
return properties;
}
public static void parseImageProperties(Map<String, String> properties, Image.ImageBuilder<?, ?> builder) {
builder.imageType(parseImageType(properties.get(DocumentStructureWrapper.ImageProperties.IMAGE_TYPE)));
builder.transparent(Boolean.parseBoolean(properties.get(DocumentStructureWrapper.ImageProperties.TRANSPARENT)));
builder.position(DocumentStructureWrapper.parseRectangle2D(properties.get(DocumentStructureWrapper.ImageProperties.POSITION)));
builder.id(properties.get(DocumentStructureWrapper.ImageProperties.ID));
}
public static void parseTableCellProperties(Map<String, String> properties, TableCell.TableCellBuilder<?, ?> builder) {
builder.row(Integer.parseInt(properties.get(DocumentStructureWrapper.TableCellProperties.ROW)));
builder.col(Integer.parseInt(properties.get(DocumentStructureWrapper.TableCellProperties.COL)));
builder.header(Boolean.parseBoolean(properties.get(DocumentStructureWrapper.TableCellProperties.HEADER)));
builder.bBox(DocumentStructureWrapper.parseRectangle2D(properties.get(DocumentStructureWrapper.TableCellProperties.B_BOX)));
}
public static void parseTableProperties(Map<String, String> properties, Table.TableBuilder builder) {
builder.numberOfRows(Integer.parseInt(properties.get(DocumentStructureWrapper.TableProperties.NUMBER_OF_ROWS)));
builder.numberOfCols(Integer.parseInt(properties.get(DocumentStructureWrapper.TableProperties.NUMBER_OF_COLS)));
}
public static Map<String, String> buildDuplicateParagraphProperties(DuplicatedParagraph duplicatedParagraph) {
Map<String, String> properties = new HashMap<>();
properties.put(DocumentStructureWrapper.DuplicateParagraphProperties.UNSORTED_TEXTBLOCK_ID,
Arrays.toString(toAtomicTextBlockIds(duplicatedParagraph.getUnsortedLeafTextBlock())));
return properties;
}
public static boolean isDuplicateParagraph(Map<String, String> properties) {
return properties.containsKey(DocumentStructureWrapper.DuplicateParagraphProperties.UNSORTED_TEXTBLOCK_ID);
}
public static List<Long> getUnsortedTextblockIds(Map<String, String> properties) {
return toLongList(properties.get(DocumentStructureWrapper.DuplicateParagraphProperties.UNSORTED_TEXTBLOCK_ID));
}
public static List<Long> toLongList(String ids) {
return Arrays.stream(ids.substring(1, ids.length() - 1).trim().split(","))
.map(Long::valueOf)
.toList();
}
private static ImageType parseImageType(String imageType) {
try {
return ImageType.valueOf(imageType.toUpperCase(Locale.ROOT));
} catch (IllegalArgumentException e) {
return ImageType.OTHER;
}
}
public static String toString(Rectangle2D rectangle2D) {
return String.format(Locale.US,
"%f%s%f%s%f%s%f",
rectangle2D.getX(),
DocumentStructureWrapper.RECTANGLE_DELIMITER,
rectangle2D.getY(),
DocumentStructureWrapper.RECTANGLE_DELIMITER,
rectangle2D.getWidth(),
DocumentStructureWrapper.RECTANGLE_DELIMITER,
rectangle2D.getHeight());
}
private static Long[] toAtomicTextBlockIds(TextBlock textBlock) {
return textBlock.getAtomicTextBlocks()
.stream()
.map(AtomicTextBlock::getId)
.toArray(Long[]::new);
}
}

View File

@ -0,0 +1,116 @@
package com.iqser.red.service.redaction.v1.server.model.document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Header;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SuperSection;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableOfContents;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableOfContentsItem;
public abstract class AbstractNodeVisitor implements NodeVisitor {
@Override
public void visit(Document document) {
defaultVisit(document);
}
@Override
public void visit(SuperSection superSection) {
defaultVisit(superSection);
}
@Override
public void visit(Section section) {
defaultVisit(section);
}
@Override
public void visit(Headline headline) {
defaultVisit(headline);
}
@Override
public void visit(Paragraph paragraph) {
defaultVisit(paragraph);
}
@Override
public void visit(Footer footer) {
defaultVisit(footer);
}
@Override
public void visit(Header header) {
defaultVisit(header);
}
@Override
public void visit(Image image) {
defaultVisit(image);
}
@Override
public void visit(Table table) {
defaultVisit(table);
}
@Override
public void visit(TableCell tableCell) {
defaultVisit(tableCell);
}
@Override
public void visit(TableOfContents toc) {
defaultVisit(toc);
}
@Override
public void visit(TableOfContentsItem toci) {
defaultVisit(toci);
}
public void visitNodeDefault(SemanticNode node) {
// By default, it does nothing
}
protected void defaultVisit(SemanticNode semanticNode) {
visitNodeDefault(semanticNode);
semanticNode.streamChildren()
.forEach(node -> node.accept(this));
}
}

View File

@ -9,6 +9,8 @@ import java.util.List;
import java.util.Optional;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.GenericSemanticNode;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
@ -17,6 +19,8 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
import com.iqser.red.service.redaction.v1.server.utils.EntityCreationUtility;
import com.iqser.red.service.redaction.v1.server.utils.EntityEnrichmentService;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
@ -35,7 +39,7 @@ public class DocumentTree {
public DocumentTree(Document document) {
root = Entry.builder().treeId(Collections.emptyList()).children(new LinkedList<>()).node(document).build();
this.root = Entry.builder().treeId(Collections.emptyList()).children(new LinkedList<>()).node(document).build();
}
@ -288,14 +292,30 @@ public class DocumentTree {
if (treeId.isEmpty()) {
return root;
}
Entry entry = root.children.get(treeId.get(0));
for (int id : treeId.subList(1, treeId.size())) {
Entry entry = root;
for (int id : treeId) {
entry = entry.children.get(id);
}
return entry;
}
public Optional<Entry> findEntryById(List<Integer> treeId) {
if (treeId.isEmpty()) {
return Optional.of(root);
}
Entry entry = root;
for (int id : treeId) {
if (id < 0 || id >= entry.children.size()) {
return Optional.empty();
}
entry = entry.children.get(id);
}
return Optional.of(entry);
}
public Stream<Entry> mainEntries() {
return root.children.stream();
@ -342,6 +362,25 @@ public class DocumentTree {
}
public void addEntityToGraph(TextEntity entity) {
getRoot().getNode().addThisToEntityIfIntersects(entity);
TextBlock textBlock = entity.getDeepestFullyContainingNode().getTextBlock();
EntityEnrichmentService.enrichEntity(entity, textBlock);
EntityCreationUtility.addToPages(entity);
EntityCreationUtility.addEntityToNodeEntitySets(entity);
if (entity.getEntityType().equals(EntityType.TEMPORARY)) {
return;
}
entity.computeRelations();
entity.notifyEntityInserted();
}
@Builder
@Getter
@AllArgsConstructor

View File

@ -0,0 +1,32 @@
package com.iqser.red.service.redaction.v1.server.model.document;
import java.util.HashSet;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import lombok.Getter;
public class IntersectingNodeVisitor extends AbstractNodeVisitor {
@Getter
private Set<SemanticNode> intersectingNodes;
private final TextRange textRange;
public IntersectingNodeVisitor(TextRange textRange) {
this.textRange = textRange;
this.intersectingNodes = new HashSet<>();
}
@Override
public void visitNodeDefault(SemanticNode node) {
if (textRange.intersects(node.getTextRange())) {
intersectingNodes.add(node);
}
}
}

View File

@ -0,0 +1,53 @@
package com.iqser.red.service.redaction.v1.server.model.document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Header;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SuperSection;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableOfContents;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableOfContentsItem;
public interface NodeVisitor {
void visit(Document document);
void visit(SuperSection superSection);
void visit(Section section);
void visit(Headline headline);
void visit(Paragraph paragraph);
void visit(Footer footer);
void visit(Header header);
void visit(Image image);
void visit(Table table);
void visit(TableCell tableCell);
void visit(TableOfContents tableOfContents);
void visit(TableOfContentsItem tableOfContentsItem);
}

View File

@ -134,6 +134,12 @@ public class TextRange implements Comparable<TextRange> {
}
public boolean containsExclusive(int index) {
return start <= index && index < end;
}
/**
* Checks if this {@link TextRange} intersects with another {@link TextRange}.
*

View File

@ -0,0 +1,20 @@
package com.iqser.red.service.redaction.v1.server.model.document.entity;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
@Getter
@RequiredArgsConstructor
public abstract class AbstractRelation implements Relation {
protected final TextEntity a;
protected final TextEntity b;
@Override
public String toString() {
return this.getClass().getSimpleName() + "{" + "a=" + a + ", b=" + b + '}';
}
}

View File

@ -0,0 +1,18 @@
package com.iqser.red.service.redaction.v1.server.model.document.entity;
public class Containment extends Intersection {
public Containment(TextEntity container, TextEntity contained) {
super(container, contained);
}
public TextEntity getContainer() {
return a;
}
public TextEntity getContained() {
return b;
}
}

View File

@ -0,0 +1,25 @@
package com.iqser.red.service.redaction.v1.server.model.document.entity;
public interface EntityEventListener {
/**
* Invoked when an entity is inserted.
*
* @param entity The entity that was inserted.
*/
void onEntityInserted(IEntity entity);
/**
* Invoked when an entity is updated.
*
* @param entity The entity that was updated.
*/
void onEntityUpdated(IEntity entity);
/**
* Invoked when an entity is removed.
*
* @param entity The entity that was removed.
*/
void onEntityRemoved(IEntity entity);
}

View File

@ -0,0 +1,10 @@
package com.iqser.red.service.redaction.v1.server.model.document.entity;
public class Equality extends Containment {
public Equality(TextEntity a, TextEntity b) {
super(a, b);
}
}

View File

@ -6,7 +6,6 @@ import java.util.PriorityQueue;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.drools.RuleIdentifier;
import lombok.NonNull;
@ -52,6 +51,17 @@ public interface IEntity {
String type();
/**
* An Entity is valid, when it active and not a false recommendation, a false positive or a dictionary removal.
*
* @return true, if the entity is valid, false otherwise/
*/
default boolean valid() {
return active();
}
/**
* Calculates the length of the entity's value.
*
@ -85,6 +95,9 @@ public interface IEntity {
// Don't use default accessor pattern (e.g. isApplied()), as it might lead to errors in drools due to property-specific optimization of the drools planner.
default boolean applied() {
if (this.getMatchedRule().isHigherPriorityThanManual()) {
return getMatchedRule().isApplied();
}
return getManualOverwrite().getApplied()
.orElse(getMatchedRule().isApplied());
}
@ -108,6 +121,10 @@ public interface IEntity {
*/
default boolean ignored() {
if (this.getMatchedRule().isHigherPriorityThanManual()) {
return getMatchedRule().isIgnored();
}
return getManualOverwrite().getIgnored()
.orElse(getMatchedRule().isIgnored());
}
@ -120,6 +137,9 @@ public interface IEntity {
*/
default boolean removed() {
if (this.getMatchedRule().isHigherPriorityThanManual()) {
return getMatchedRule().isRemoved();
}
return getManualOverwrite().getRemoved()
.orElse(getMatchedRule().isRemoved());
}
@ -132,6 +152,9 @@ public interface IEntity {
*/
default boolean resized() {
if (this.getMatchedRule().isHigherPriorityThanManual()) {
return getMatchedRule().isRemoved();
}
return getManualOverwrite().getResized()
.orElse(false);
}
@ -316,7 +339,9 @@ public interface IEntity {
*/
default void addMatchedRule(MatchedRule matchedRule) {
boolean wasValid = valid();
getMatchedRuleList().add(matchedRule);
handleStateChange(wasValid);
}
@ -330,7 +355,53 @@ public interface IEntity {
if (getMatchedRuleList().equals(matchedRules)) {
return;
}
boolean wasValid = valid();
getMatchedRuleList().addAll(matchedRules);
handleStateChange(wasValid);
}
void addEntityEventListener(EntityEventListener listener);
void removeEntityEventListener(EntityEventListener listener);
default void notifyEntityInserted() {
for (EntityEventListener listener : getEntityEventListeners()) {
listener.onEntityInserted(this);
}
}
default void notifyEntityUpdated() {
for (EntityEventListener listener : getEntityEventListeners()) {
listener.onEntityUpdated(this);
}
}
default void notifyEntityRemoved() {
for (EntityEventListener listener : getEntityEventListeners()) {
listener.onEntityRemoved(this);
}
}
Collection<EntityEventListener> getEntityEventListeners();
default void handleStateChange(boolean wasValid) {
if (valid() == wasValid) {
return;
}
if (!removed()) {
notifyEntityUpdated();
} else {
notifyEntityRemoved();
}
}
@ -364,15 +435,9 @@ public interface IEntity {
*
* @return The built reason string.
*/
default String buildReasonWithManualChangeDescriptions() {
default String buildReason() {
if (getManualOverwrite().getDescriptions().isEmpty()) {
return getMatchedRule().getReason();
}
if (getMatchedRule().getReason().isEmpty()) {
return String.join(", ", getManualOverwrite().getDescriptions());
}
return getMatchedRule().getReason() + ", " + String.join(", ", getManualOverwrite().getDescriptions());
return getMatchedRule().getReason();
}

View File

@ -0,0 +1,10 @@
package com.iqser.red.service.redaction.v1.server.model.document.entity;
public class Intersection extends AbstractRelation {
public Intersection(TextEntity a, TextEntity b) {
super(a, b);
}
}

View File

@ -1,10 +1,8 @@
package com.iqser.red.service.redaction.v1.server.model.document.entity;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation;
@ -14,7 +12,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
import com.iqser.red.service.redaction.v1.server.model.RectangleWithPage;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
@ -26,18 +23,9 @@ import lombok.experimental.FieldDefaults;
@FieldDefaults(level = AccessLevel.PRIVATE)
public class ManualChangeOverwrite {
private static final Map<Class<? extends BaseAnnotation>, String> MANUAL_CHANGE_DESCRIPTIONS = Map.of(//
ManualRedactionEntry.class, "created by manual change", //
ManualLegalBasisChange.class, "legal basis was manually changed", //
ManualResizeRedaction.class, "resized by manual override", //
ManualForceRedaction.class, "forced by manual override", //
IdRemoval.class, "removed by manual override", //
ManualRecategorization.class, "recategorized by manual override");
@Builder.Default
List<BaseAnnotation> manualChanges = new LinkedList<>();
boolean changed;
List<String> descriptions;
String type;
String legalBasis;
String section;
@ -63,6 +51,7 @@ public class ManualChangeOverwrite {
this.manualChanges = new LinkedList<>();
}
public ManualChangeOverwrite(EntityType entityType, String section) {
this(entityType);
@ -95,8 +84,6 @@ public class ManualChangeOverwrite {
private void updateFields(List<BaseAnnotation> sortedManualChanges) {
descriptions = new LinkedList<>();
for (BaseAnnotation manualChange : sortedManualChanges) {
// ManualRedactionEntries are created prior to rule execution in analysis service.
@ -151,8 +138,6 @@ public class ManualChangeOverwrite {
legalBasis = recategorization.getLegalBasis();
}
}
descriptions.add(MANUAL_CHANGE_DESCRIPTIONS.get(manualChange.getClass()));
}
changed = false;
}
@ -245,13 +230,6 @@ public class ManualChangeOverwrite {
}
public List<String> getDescriptions() {
calculateCurrentOverride();
return descriptions == null ? Collections.emptyList() : descriptions;
}
public Optional<List<RectangleWithPage>> getPositions() {
calculateCurrentOverride();

View File

@ -5,9 +5,6 @@ import java.util.List;
import java.util.Objects;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.drools.RuleIdentifier;
import com.iqser.red.service.redaction.v1.server.model.drools.RuleType;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
@ -28,8 +25,9 @@ public final class MatchedRule implements Comparable<MatchedRule> {
public static final RuleType FINAL_TYPE = RuleType.fromString("FINAL");
public static final RuleType ELIMINATION_RULE_TYPE = RuleType.fromString("X");
public static final RuleType IMPORTED_TYPE = RuleType.fromString("IMP");
public static final RuleType MANUAL_TYPE = RuleType.fromString("MAN");
public static final RuleType DICTIONARY_TYPE = RuleType.fromString("DICT");
private static final List<RuleType> RULE_TYPE_PRIORITIES = List.of(FINAL_TYPE, ELIMINATION_RULE_TYPE, IMPORTED_TYPE, DICTIONARY_TYPE);
private static final List<RuleType> RULE_TYPE_PRIORITIES = List.of(FINAL_TYPE, ELIMINATION_RULE_TYPE, MANUAL_TYPE, IMPORTED_TYPE, DICTIONARY_TYPE);
RuleIdentifier ruleIdentifier;
@Builder.Default
@ -57,6 +55,13 @@ public final class MatchedRule implements Comparable<MatchedRule> {
}
public boolean isHigherPriorityThanManual() {
return (-1 < RULE_TYPE_PRIORITIES.indexOf(this.ruleIdentifier.type())) && (RULE_TYPE_PRIORITIES.indexOf(this.ruleIdentifier.type()) < RULE_TYPE_PRIORITIES.indexOf(
MANUAL_TYPE));
}
/**
* Returns a modified instance of {@link MatchedRule} based on its applied status.
* If the rule has been applied, it returns a new {@link MatchedRule} instance that retains all properties of the original

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.model;
package com.iqser.red.service.redaction.v1.server.model.document.entity;
import java.awt.geom.Rectangle2D;

View File

@ -0,0 +1,10 @@
package com.iqser.red.service.redaction.v1.server.model.document.entity;
public interface Relation {
TextEntity getA();
TextEntity getB();
}

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.model.drools;
package com.iqser.red.service.redaction.v1.server.model.document.entity;
import java.util.Objects;

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.model.drools;
package com.iqser.red.service.redaction.v1.server.model.document.entity;
import java.util.regex.Pattern;

View File

@ -4,6 +4,7 @@ import java.awt.geom.Rectangle2D;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
@ -11,7 +12,10 @@ import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;
import org.apache.commons.collections4.map.HashedMap;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
@ -24,6 +28,10 @@ import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.FieldDefaults;
/**
* Represents a text entity within a document, characterized by its text range, type, entity type,
* and associated metadata like matched rules, pages, and engines.
*/
@Data
@Builder
@AllArgsConstructor
@ -39,13 +47,14 @@ public class TextEntity implements IEntity {
TextRange textRange;
@Builder.Default
List<TextRange> duplicateTextRanges = new ArrayList<>();
Set<TextRange> duplicateTextRanges = new HashSet<>();
String type; // TODO: make final once ManualChangesApplicationService::recategorize is deleted
final EntityType entityType;
@Builder.Default
final PriorityQueue<MatchedRule> matchedRuleList = new PriorityQueue<>();
final ManualChangeOverwrite manualOverwrite;
@Builder.Default
final ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite();
boolean dictionaryEntry;
boolean dossierDictionaryEntry;
@ -64,6 +73,12 @@ public class TextEntity implements IEntity {
List<SemanticNode> intersectingNodes = new LinkedList<>();
SemanticNode deepestFullyContainingNode;
@Builder.Default
Map<TextEntity, Set<Relation>> relations = new HashMap<>();
@Builder.Default
Collection<EntityEventListener> entityEventListeners = new ArrayList<>();
public static TextEntity initialEntityNode(TextRange textRange, String type, EntityType entityType, SemanticNode node) {
@ -154,12 +169,15 @@ public class TextEntity implements IEntity {
public void removeFromGraph() {
remove("FINAL.0.0", "removed completely");
intersectingNodes.forEach(node -> node.getEntities().remove(this));
pages.forEach(page -> page.getEntities().remove(this));
intersectingNodes = new LinkedList<>();
relations.keySet()
.forEach(entity -> entity.getRelations().remove(this));
relations = new HashedMap<>();
deepestFullyContainingNode = null;
pages = new HashSet<>();
remove("FINAL.0.0", "removed completely");
}
@ -197,22 +215,20 @@ public class TextEntity implements IEntity {
return textEntity.contains(this);
}
public boolean contains(TextEntity textEntity) {
if (this.textRange.contains(textEntity.getTextRange())) {
return true;
}
List<TextRange> textEntityDuplicateRanges = textEntity.getDuplicateTextRanges();
// use optimized indexed loops for extra performance boost
for (int i = 0, duplicateTextRangesSize = duplicateTextRanges.size(); i < duplicateTextRangesSize; i++) {
TextRange duplicateTextRange = duplicateTextRanges.get(i);
Set<TextRange> textEntityDuplicateRanges = textEntity.getDuplicateTextRanges();
for (TextRange duplicateTextRange : this.duplicateTextRanges) {
if (duplicateTextRange.contains(textEntity.getTextRange())) {
return true;
}
for (int j = 0, textEntityDuplicateRangesSize = textEntityDuplicateRanges.size(); j < textEntityDuplicateRangesSize; j++) {
TextRange otherRange = textEntityDuplicateRanges.get(j);
for (TextRange otherRange : textEntityDuplicateRanges) {
if (duplicateTextRange.contains(otherRange)) {
return true;
}
@ -223,6 +239,7 @@ public class TextEntity implements IEntity {
}
public boolean intersects(TextEntity textEntity) {
return this.textRange.intersects(textEntity.getTextRange()) //
@ -247,6 +264,20 @@ public class TextEntity implements IEntity {
}
public void addManualChange(BaseAnnotation manualChange) {
manualOverwrite.addChange(manualChange);
notifyEntityUpdated();
}
public void addManualChanges(List<BaseAnnotation> manualChanges) {
manualOverwrite.addChanges(manualChanges);
notifyEntityUpdated();
}
public boolean matchesAnnotationId(String manualRedactionId) {
return getPositionsOnPagePerPage().stream()
@ -285,6 +316,21 @@ public class TextEntity implements IEntity {
}
/**
* @return true when this entity is of EntityType ENTITY or HINT
*/
public boolean validEntityType() {
return entityType.equals(EntityType.ENTITY) || entityType.equals(EntityType.HINT);
}
public boolean valid() {
return active() && validEntityType();
}
@Override
public String value() {
@ -292,4 +338,42 @@ public class TextEntity implements IEntity {
.orElse(getMatchedRule().isWriteValueWithLineBreaks() ? getValueWithLineBreaks() : value);
}
@Override
public void addEntityEventListener(EntityEventListener listener) {
entityEventListeners.add(listener);
}
@Override
public void removeEntityEventListener(EntityEventListener listener) {
entityEventListeners.remove(listener);
}
public void computeRelations() {
for (TextEntity textEntity : this.getDeepestFullyContainingNode().getEntities()) {
if (this.intersects(textEntity) && !this.equals(textEntity) && !textEntity.getEntityType().equals(EntityType.TEMPORARY)) {
if (textEntity.getTextRange().equals(this.getTextRange())) {
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Equality(this, textEntity));
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Equality(textEntity, this));
} else if (textEntity.containedBy(this)) {
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this));
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Containment(this, textEntity));
} else if (this.containedBy(textEntity)) {
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Containment(textEntity, this));
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity));
} else {
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this));
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity));
}
}
}
}
}

View File

@ -9,7 +9,6 @@ import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;

View File

@ -10,6 +10,7 @@ import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import lombok.AccessLevel;
@ -38,7 +39,6 @@ public class Document extends AbstractSemanticNode {
@Builder.Default
static final SectionIdentifier sectionIdentifier = SectionIdentifier.document();
@Override
public NodeType getType() {
@ -63,8 +63,8 @@ public class Document extends AbstractSemanticNode {
*
* @return A list of main sections within the document
* @deprecated This method is marked for removal.
* Use {@link #streamChildrenOfType(NodeType)} instead,
* or {@link #getChildrenOfTypeSectionOrSuperSection()} which returns children of type SECTION as well as SUPER_SECTION.
* Use {@link #streamChildrenOfType(NodeType)} instead,
* or {@link #getChildrenOfTypeSectionOrSuperSection()} which returns children of type SECTION as well as SUPER_SECTION.
*/
@Deprecated(forRemoval = true)
public List<Section> getMainSections() {
@ -168,4 +168,11 @@ public class Document extends AbstractSemanticNode {
return bBox;
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
}

View File

@ -1,19 +1,10 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import java.awt.geom.Rectangle2D;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor;
@ -67,4 +58,11 @@ public class Footer extends AbstractSemanticNode {
return getTreeId() + ": " + NodeType.FOOTER + ": " + leafTextBlock.buildSummary();
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
}

View File

@ -1,19 +1,10 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import java.awt.geom.Rectangle2D;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor;
@ -70,4 +61,11 @@ public class Header extends AbstractSemanticNode {
return getTreeId() + ": " + NodeType.HEADER + ": " + leafTextBlock.buildSummary();
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
}

View File

@ -1,20 +1,11 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import java.awt.geom.Rectangle2D;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor;
@ -71,6 +62,11 @@ public class Headline extends AbstractSemanticNode {
}
/**
* Extracts the SectionIdentifier from the text of this headline.
*
* @return The SectionIdentifier, with which the headline starts.
*/
@Override
public SectionIdentifier getSectionIdentifier() {
@ -104,4 +100,11 @@ public class Headline extends AbstractSemanticNode {
.isPresent();
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
}

View File

@ -1,24 +1,22 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import java.awt.geom.Rectangle2D;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityEventListener;
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite;
import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
@ -30,8 +28,7 @@ import lombok.experimental.FieldDefaults;
import lombok.experimental.SuperBuilder;
/**
*
Represents an image within the document.
* Represents an image within the document.
*/
@Data
@SuperBuilder
@ -43,6 +40,7 @@ public class Image extends AbstractSemanticNode implements IEntity {
String id;
String representationHash;
TextBlock leafTextBlock;
ImageType imageType;
@ -57,6 +55,9 @@ public class Image extends AbstractSemanticNode implements IEntity {
Page page;
@Builder.Default
Collection<EntityEventListener> entityEventListeners = new ArrayList<>();
@Override
public NodeType getType() {
@ -79,6 +80,18 @@ public class Image extends AbstractSemanticNode implements IEntity {
}
public boolean isFullPageImage() {
return imageType.equals(ImageType.OCR) || getArea() >= 0.5 * page.getArea();
}
private double getArea() {
return position.getWidth() * position.getHeight();
}
@Override
public TextRange getTextRange() {
@ -93,17 +106,33 @@ public class Image extends AbstractSemanticNode implements IEntity {
}
@Override
public void addEntityEventListener(EntityEventListener listener) {
entityEventListeners.add(listener);
}
@Override
public void removeEntityEventListener(EntityEventListener listener) {
entityEventListeners.remove(listener);
}
@Override
public String type() {
return getManualOverwrite().getType().orElse(imageType.toString().toLowerCase(Locale.ENGLISH));
return getManualOverwrite().getType()
.orElse(imageType.toString().toLowerCase(Locale.ENGLISH));
}
@Override
public String toString() {
return getTreeId() + ": " + getValue() + " " + position;
return getTreeId() + ": " + getValue() + " [%.2f,%.2f,%.2f,%.2f]".formatted(position.getX(), position.getY(), position.getWidth(), position.getHeight());
}
@ -154,4 +183,18 @@ public class Image extends AbstractSemanticNode implements IEntity {
return (area / calculatedIntersection) > containmentThreshold;
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
@Override
public boolean isLeaf() {
return true;
}
}

View File

@ -0,0 +1,7 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
public enum LayoutEngine {
ALGORITHM,
AI,
OUTLINE
}

View File

@ -0,0 +1,127 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import lombok.experimental.FieldDefaults;
/**
* Represents a single page in a document.
*/
@Getter
@Setter
@Builder
@NoArgsConstructor
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
public class Page {
@EqualsAndHashCode.Include
Integer number;
Integer height;
Integer width;
Integer rotation;
List<AtomicTextBlock> textBlocksOnPage;
Header header;
Footer footer;
@Builder.Default
Set<TextEntity> entities = new HashSet<>();
@Builder.Default
Set<Image> images = new HashSet<>();
/**
* Constructs and returns a {@link TextBlock} representing the concatenated text of all leaf semantic nodes in the main body.
*
* @return The main body text block.
*/
public TextBlock getMainBodyTextBlock() {
return textBlocksOnPage.stream()
.filter(atb -> !atb.isEmpty())
.collect(new TextBlockCollector());
}
/**
* Retrieves the highest SemanticNodes, which appear only on this page. It is achieved by traversing the DocumentTree up, until a SemanticNode's direct parent is no longer exclusively on this page.
*
* @return A list which contains the highes SemanticNodes, which appear only on this page.
*/
public List<SemanticNode> getMainBody() {
return textBlocksOnPage.stream()
.map(AtomicTextBlock::getParent)
.map(this::getHighestParentOnlyOnPage)
.distinct()
.toList();
}
/**
* Retrieves the highest SemanticNodes which are present on the page. There might be multiples, as two or more Main Sections start on a page.
* This is achieved by traversing up the document tree and returning all SemanticNodes whose direct parent is the Document
*
* @return A list of the highest SemanticNodes present on this page
*/
public Stream<SemanticNode> streamHighestSemanticNodesOnPage() {
return textBlocksOnPage.stream()
.map(AtomicTextBlock::getParent)
.map(this::getHighestSemanticNodeOnPage)
.distinct();
}
private SemanticNode getHighestParentOnlyOnPage(SemanticNode node) {
SemanticNode currentNode = node;
while (currentNode.hasParent() && currentNode.getParent().onlyOnPage(this)) {
currentNode = currentNode.getParent();
}
return currentNode;
}
private SemanticNode getHighestSemanticNodeOnPage(SemanticNode node) {
SemanticNode currentNode = node;
while (currentNode.hasParent() //
&& !currentNode.getParent().getType().equals(NodeType.DOCUMENT)) {
currentNode = currentNode.getParent();
}
return currentNode;
}
@Override
public String toString() {
return "Page: " + number;
}
public double getArea() {
return width * height;
}
}

View File

@ -1,19 +1,10 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import java.awt.geom.Rectangle2D;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.FieldDefaults;
@ -59,4 +50,11 @@ public class Paragraph extends AbstractSemanticNode {
return getTreeId() + ": " + NodeType.PARAGRAPH + ": " + leafTextBlock.buildSummary();
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
}

View File

@ -1,23 +1,11 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import java.awt.geom.Rectangle2D;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.RequiredArgsConstructor;
import lombok.experimental.FieldDefaults;
import lombok.experimental.SuperBuilder;
import lombok.extern.slf4j.Slf4j;
@ -33,7 +21,6 @@ import lombok.extern.slf4j.Slf4j;
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true)
public class Section extends AbstractSemanticNode {
@Override
public NodeType getType() {
@ -53,6 +40,11 @@ public class Section extends AbstractSemanticNode {
}
/**
* Returns the SectionIdentifier from the headline obtained by the getHeadline() method.
*
* @return the SectionIdentifier of the associated Headline
*/
@Override
public SectionIdentifier getSectionIdentifier() {
@ -60,7 +52,6 @@ public class Section extends AbstractSemanticNode {
}
@Override
public String toString() {
@ -101,4 +92,10 @@ public class Section extends AbstractSemanticNode {
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
}

View File

@ -3,29 +3,36 @@ package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.experimental.FieldDefaults;
/**
* Represents a unique identifier for a section within a document.
* Represents the textual identifier sometimes present in a Headline. For example, given the headline 3.1 Results, the section identifier is 3.1.
* Keep in mind, this identifier must not be unique in a single document, as there might be multiple headlines starting with the same textual identifier.
*/
@AllArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class SectionIdentifier {
static Pattern numericalIdentifierPattern = Pattern.compile("^[\\s]?(\\d+)[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?");
public static Pattern numericalIdentifierPattern = Pattern.compile("^[\\s]?(\\d{1,2})(?:[\\s.,;](\\d{1,2}))?(?:[\\s.,;](\\d{1,2}))?(?:[\\s.,;](\\d{1,2}))?");
public static Pattern alphanumericIdentifierPattern = Pattern.compile("^[\\s]?[A-Za-z][\\s.,;]?(\\d+)[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?[\\s.,;]?");
private enum Format {
protected enum Format {
EMPTY,
NUMERICAL,
ALPHANUMERIC,
DOCUMENT
}
@Getter
Format format;
@Getter
String identifierString;
List<Integer> identifiers;
boolean asChild;
@ -47,6 +54,10 @@ public class SectionIdentifier {
if (numericalIdentifierMatcher.find()) {
return buildNumericalSectionIdentifier(headline, numericalIdentifierMatcher);
}
Matcher alphanumericIdentifierMatcher = alphanumericIdentifierPattern.matcher(headline);
if (alphanumericIdentifierMatcher.find()) {
return buildAlphanumericSectionIdentifier(headline, alphanumericIdentifierMatcher);
}
// more formats here
return SectionIdentifier.empty();
}
@ -105,8 +116,34 @@ public class SectionIdentifier {
}
private static SectionIdentifier buildAlphanumericSectionIdentifier(String headline, Matcher alphanumericIdentifierMatcher) {
String identifierString = headline.substring(alphanumericIdentifierMatcher.start(), alphanumericIdentifierMatcher.end());
String alphanumericIdentifier = alphanumericIdentifierMatcher.group(0).substring(0, 1).toUpperCase(Locale.ENGLISH);
int mappedCharacterValue = alphanumericIdentifier.charAt(0) - 'A' + 1;
List<Integer> identifiers = new LinkedList<>();
identifiers.add(mappedCharacterValue);
for (int i = 1; i <= 3; i++) {
String numericalIdentifier = alphanumericIdentifierMatcher.group(i);
if (numericalIdentifier == null || numericalIdentifier.equals("0") || numericalIdentifier.isEmpty() || numericalIdentifier.isBlank()) {
break;
}
identifiers.add(Integer.parseInt(numericalIdentifier.trim()));
}
return new SectionIdentifier(Format.ALPHANUMERIC,
// Changed format to reflect alphanumeric
identifierString,
identifiers.stream()
.toList(),
false);
}
/**
* Determines if the current section is the parent of the given section.
* Determines if the current SectionIdentifier is the parent of the given SectionIdentifier.
*
* @param sectionIdentifier The section identifier to compare against.
* @return true if the current section is the parent of the given section, false otherwise.
@ -155,4 +192,30 @@ public class SectionIdentifier {
return identifierString;
}
/**
* @return true, when no identifier could be found
*/
public boolean isEmpty() {
return this.format.equals(Format.EMPTY);
}
/**
* The level of a SectionIdentifier corresponds with the count of identifiers. E.g. 1.1 is level 2, and 1. is level 1
*
* @return the level of the Headline
*/
public int level() {
return identifiers.size();
}
protected List<Integer> getIdentifiers() {
return identifiers;
}
}

View File

@ -15,15 +15,16 @@ import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConsecutiveTextBlockCollector;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
import com.iqser.red.service.redaction.v1.server.service.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations;
import com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
public interface SemanticNode {
@ -50,6 +51,23 @@ public interface SemanticNode {
}
/**
* Searches all Nodes located underneath this Node in the DocumentTree that are found on the given pages.
* Then consecutive AtomicTextBlocks are concatenated where possible and the list of the resulting TextBlocks is returned.
*
* @return List of TextBlocks containing all AtomicTextBlocks that are located under this Node on the given pages.
*/
default List<TextBlock> getTextBlocksByPageNumbers(Set<Integer> pageNumbers) {
return streamAllSubNodes().filter(SemanticNode::isLeaf)
.map(SemanticNode::getTextBlock)
.map(TextBlock::getAtomicTextBlocks)
.flatMap(List::stream)
.filter(atb -> pageNumbers.contains(atb.getPage().getNumber()))
.collect(new ConsecutiveTextBlockCollector());
}
/**
* Any Node maintains its own Set of Entities.
* This Set contains all Entities whose TextRange intersects the TextRange of this node.
@ -59,6 +77,20 @@ public interface SemanticNode {
Set<TextEntity> getEntities();
/**
* A view of the Entity Set of this SemanticNode including only the active (APPLIED or SKIPPED) Entities which are of a valid type (ENTITY or HINT).
* This is used for all functions, which check for the existence of an Entity, such as hasEntityOfType().
*
* @return Set of valid TextEntities
*/
default Stream<TextEntity> streamValidEntities() {
return getEntities().stream()
.filter(IEntity::active)
.filter(TextEntity::validEntityType);
}
/**
* Each AtomicTextBlock is assigned a page, so to get the pages this node appears on, it collects the PageNodes from each AtomicTextBlock belonging to this node's TextBlock.
*
@ -149,7 +181,7 @@ public interface SemanticNode {
/**
* Returns a SectionIdentifier, such that it acts as a child of the first Headline associated with this SemanticNode.
* Returns the SectionIdentifier as a child of the SectionIdentifier returned by the getHeadline() method.
*
* @return The SectionIdentifier from the first Headline.
*/
@ -259,9 +291,7 @@ public interface SemanticNode {
*/
default boolean hasEntitiesOfType(String type) {
return getEntities().stream()
.filter(TextEntity::active)
.anyMatch(redactionEntity -> redactionEntity.type().equals(type));
return streamValidEntities().anyMatch(redactionEntity -> redactionEntity.type().equals(type));
}
@ -274,10 +304,8 @@ public interface SemanticNode {
*/
default boolean hasEntitiesOfAnyType(String... types) {
return getEntities().stream()
.filter(TextEntity::active)
.anyMatch(redactionEntity -> Arrays.stream(types)
.anyMatch(type -> redactionEntity.type().equals(type)));
return streamValidEntities().anyMatch(redactionEntity -> Arrays.stream(types)
.anyMatch(type -> redactionEntity.type().equals(type)));
}
@ -290,9 +318,7 @@ public interface SemanticNode {
*/
default boolean hasEntitiesOfAllTypes(String... types) {
return getEntities().stream()
.filter(TextEntity::active)
.map(TextEntity::type)
return streamValidEntities().map(TextEntity::type)
.collect(Collectors.toUnmodifiableSet())
.containsAll(Arrays.stream(types)
.toList());
@ -301,31 +327,28 @@ public interface SemanticNode {
/**
* Returns a List of Entities in this SemanticNode which are of the provided type such as "CBI_author".
* Ignores Entity with ignored == true or removed == true.
* Ignores Entity which are not active or of a removal type ignored == true or removed == true.
*
* @param type string representing the type of entities to return
* @return List of RedactionEntities of any the type
*/
default List<TextEntity> getEntitiesOfType(String type) {
return getEntities().stream()
.filter(TextEntity::active)
.filter(redactionEntity -> redactionEntity.type().equals(type))
return streamValidEntities().filter(redactionEntity -> redactionEntity.type().equals(type))
.toList();
}
/**
* Returns a List of Entities in this SemanticNode which have any of the provided types such as "CBI_author".
* Ignores Entity with ignored == true or removed == true.
* Ignores Entity that are not valid.
*
* @param types A list of strings representing the types of entities to return
* @return List of RedactionEntities of any provided type
*/
default List<TextEntity> getEntitiesOfType(List<String> types) {
return getEntities().stream()
.filter(TextEntity::active)
return streamValidEntities()//
.filter(redactionEntity -> redactionEntity.isAnyType(types))
.toList();
}
@ -333,15 +356,14 @@ public interface SemanticNode {
/**
* Returns a List of Entities in this SemanticNode which have any of the provided types.
* Ignores Entity with the ignored flag set to true or the removed flag set to true.
* Ignores Entity that are not valid.
*
* @param types A list of strings representing the types of entities to return
* @return List of RedactionEntities that match any of the provided types
*/
default List<TextEntity> getEntitiesOfType(String... types) {
return getEntities().stream()
.filter(TextEntity::active)
return streamValidEntities()//
.filter(redactionEntity -> redactionEntity.isAnyType(Arrays.stream(types)
.toList()))
.toList();
@ -445,7 +467,7 @@ public interface SemanticNode {
*/
default boolean containsStringIgnoreCase(String string) {
return getTextBlock().getSearchText().toLowerCase(Locale.ROOT).contains(string.toLowerCase(Locale.ROOT));
return getTextBlock().getSearchTextLowerCase().contains(string.toLowerCase(Locale.ROOT));
}
@ -756,13 +778,12 @@ public interface SemanticNode {
/**
* TODO: this produces unwanted results for sections spanning multiple columns.
* Computes the Union of the bounding boxes of all children recursively.
*
* @return The union of the BoundingBoxes of all children
*/
private Map<Page, Rectangle2D> getBBoxFromChildren() {
//TODO: this produces unwanted results for sections spanning multiple columns.
Map<Page, Rectangle2D> bBoxPerPage = new HashMap<>();
List<Map<Page, Rectangle2D>> childrenBBoxes = streamChildren().map(SemanticNode::getBBox)
.toList();
@ -797,15 +818,24 @@ public interface SemanticNode {
/**
* Accepts a {@link NodeVisitor} and initiates a depth-first traversal of the semantic tree rooted at this node.
* The visitor's {@link NodeVisitor#visit(SemanticNode)} method is invoked for each node encountered during the traversal.
* The visitor's {@link NodeVisitor#visit} method is invoked for each node encountered during the traversal.
*
* @param visitor The {@link NodeVisitor} to accept and apply during the traversal.
* @see NodeVisitor
*/
default void accept(NodeVisitor visitor) {
void accept(NodeVisitor visitor);
visitor.visit(this);
streamChildren().forEach(childNode -> childNode.accept(visitor));
/**
* Checks wether this SemanticNode appears on a single page only, and if that page is the provided one.
*
* @param page the page to check
* @return true, when SemanticNode is on a single page only and the page is the provided page. Otherwise, false.
*/
default boolean onlyOnPage(Page page) {
Set<Page> pages = getPages();
return pages.size() == 1 && pages.contains(page);
}
}

View File

@ -1,8 +1,9 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.FieldDefaults;
@ -39,6 +40,11 @@ public class SuperSection extends AbstractSemanticNode {
}
/**
* Returns the SectionIdentifier from the headline obtained by the getHeadline() method.
*
* @return the SectionIdentifier of the associated Headline
*/
@Override
public SectionIdentifier getSectionIdentifier() {
@ -46,7 +52,6 @@ public class SuperSection extends AbstractSemanticNode {
}
@Override
public String toString() {
@ -87,4 +92,10 @@ public class SuperSection extends AbstractSemanticNode {
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
}

View File

@ -14,10 +14,10 @@ import java.util.stream.IntStream;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
@ -74,8 +74,7 @@ public class Table implements SemanticNode {
return IntStream.range(0, numberOfRows).boxed()
.filter(row -> rowContainsStringsIgnoreCase(row, strings))
.flatMap(this::streamRow)
.map(TableCell::getEntities)
.flatMap(Collection::stream);
.flatMap(TableCell::streamValidEntities);
}
@ -135,11 +134,11 @@ public class Table implements SemanticNode {
/**
* Streams all entities in this table, that appear in a row, which contains at least one entity with any of the provided types.
* Streams all entities in this table, that appear in a row, which contains at least one valid entity with any of the provided types.
* Ignores Entity with ignored == true or removed == true.
*
* @param types type strings to check whether a row contains an entity like them
* @return Stream of all entities in this table, that appear in a row, which contains at least one entity with any of the provided types.
* @return Stream of all entities in this table, that appear in a row, which contains at least one valid entity with any of the provided types.
*/
public Stream<TextEntity> streamEntitiesWhereRowContainsEntitiesOfType(List<String> types) {
@ -192,30 +191,26 @@ public class Table implements SemanticNode {
/**
* Streams all Entities in the given row.
* Streams all valid Entities in the given row.
*
* @param rowNumber the row number to look for
* @return stream of TextEntities occurring in row
*/
public Stream<TextEntity> streamTextEntitiesInRow(int rowNumber) {
return streamRow(rowNumber).map(TableCell::getEntities)
.flatMap(Collection::stream)
.filter(TextEntity::active);
return streamRow(rowNumber).flatMap(TableCell::streamValidEntities);
}
/**
* Streams all Entities in the given col.
* Streams all valid Entities in the given col.
*
* @param colNumber the column number to look for
* @return stream of TextEntities occurring in row
*/
public Stream<TextEntity> streamTextEntitiesInCol(int colNumber) {
return streamCol(colNumber).map(TableCell::getEntities)
.flatMap(Collection::stream)
.filter(TextEntity::active);
return streamCol(colNumber).flatMap(TableCell::streamValidEntities);
}
@ -269,6 +264,7 @@ public class Table implements SemanticNode {
return streamHeaders().filter(tableCellNode -> tableCellNode.getTextBlock().getSearchText().contains(header))
.map(TableCell::getCol)
.distinct()
.flatMap(this::streamCol)
.filter(tableCellNode -> !tableCellNode.isHeader());
}
@ -423,4 +419,11 @@ public class Table implements SemanticNode {
return treeId.toString() + ": " + NodeType.TABLE + ": #cols: " + numberOfCols + ", #rows: " + numberOfRows + ", " + this.getTextBlock().buildSummary();
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
}

View File

@ -2,20 +2,14 @@ package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import java.awt.geom.Rectangle2D;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.FieldDefaults;
@ -42,7 +36,6 @@ public class TableCell extends AbstractSemanticNode {
TextBlock textBlock;
@Override
public Map<Page, Rectangle2D> getBBox() {
@ -88,4 +81,11 @@ public class TableCell extends AbstractSemanticNode {
return getTreeId() + ": " + NodeType.TABLE_CELL + ": " + this.getTextBlock().buildSummary();
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
}

View File

@ -0,0 +1,47 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.FieldDefaults;
import lombok.experimental.SuperBuilder;
@Data
@SuperBuilder
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
@EqualsAndHashCode(callSuper = true)
public class TableOfContents extends AbstractSemanticNode {
@Override
public NodeType getType() {
return NodeType.TABLE_OF_CONTENTS;
}
public Headline getHeadline() {
return streamChildrenOfType(NodeType.HEADLINE).map(node -> (Headline) node)
.findFirst()
.orElseGet(() -> getParent().getHeadline());
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
@Override
public String toString() {
return getTreeId() + ": " + NodeType.TABLE_OF_CONTENTS_ITEM + ": " + getTextBlock().buildSummary();
}
}

View File

@ -0,0 +1,57 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.FieldDefaults;
import lombok.experimental.SuperBuilder;
@Data
@SuperBuilder
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
@EqualsAndHashCode(callSuper = true)
public class TableOfContentsItem extends AbstractSemanticNode {
TextBlock leafTextBlock;
@Override
public NodeType getType() {
return NodeType.TABLE_OF_CONTENTS_ITEM;
}
@Override
public boolean isLeaf() {
return true;
}
@Override
public void accept(NodeVisitor visitor) {
visitor.visit(this);
}
@Override
public TextBlock getTextBlock() {
return leafTextBlock;
}
@Override
public String toString() {
return getTreeId() + ": " + NodeType.TABLE_OF_CONTENTS_ITEM + ": " + leafTextBlock.buildSummary();
}
}

View File

@ -1,11 +1,12 @@
package com.iqser.red.service.redaction.v1.server.model.document.textblock;
import static com.iqser.red.service.redaction.v1.server.data.DocumentPositionDataProto.DocumentPositionData;
import static java.lang.String.format;
import java.awt.geom.Rectangle2D;
import java.lang.ref.SoftReference;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
@ -15,12 +16,12 @@ import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import com.iqser.red.service.redaction.v1.server.data.DocumentPositionDataProto.DocumentPositionData.Position;
import com.iqser.red.service.redaction.v1.server.data.DocumentTextDataProto.DocumentTextData;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextData;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
@ -28,6 +29,7 @@ import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.NonNull;
import lombok.experimental.FieldDefaults;
@Data
@ -36,19 +38,33 @@ import lombok.experimental.FieldDefaults;
@FieldDefaults(level = AccessLevel.PRIVATE)
public class AtomicTextBlock implements TextBlock {
@NonNull
Long id;
@NonNull
Integer numberOnPage;
@NonNull
Page page;
//string coordinates
@NonNull
TextRange textRange;
@NonNull
String searchText;
List<String> words;
@NonNull
List<Integer> lineBreaks;
@NonNull
List<TextRange> italicTextRanges;
@NonNull
List<TextRange> boldTextRanges;
SoftReference<String> searchTextLowerCaseCache;
SoftReference<List<String>> wordsCache;
//position coordinates
@NonNull
List<Integer> stringIdxToPositionIdx;
@Getter
@NonNull
List<Rectangle2D> positions;
@EqualsAndHashCode.Exclude
@ -74,6 +90,8 @@ public class AtomicTextBlock implements TextBlock {
.stringIdxToPositionIdx(Collections.emptyList())
.positions(Collections.emptyList())
.parent(parent)
.boldTextRanges(Collections.emptyList())
.italicTextRanges(Collections.emptyList())
.build();
}
@ -86,20 +104,26 @@ public class AtomicTextBlock implements TextBlock {
.page(page)
.textRange(new TextRange(atomicTextBlockData.getStart(), atomicTextBlockData.getEnd()))
.searchText(atomicTextBlockData.getSearchText())
.lineBreaks(Arrays.stream(atomicTextBlockData.getLineBreaks()).boxed()
.toList())
.stringIdxToPositionIdx(Arrays.stream(atomicPositionBlockData.getStringIdxToPositionIdx()).boxed()
.toList())
.positions(toRectangle2DList(atomicPositionBlockData.getPositions()))
.lineBreaks(atomicTextBlockData.getLineBreaksList())
.stringIdxToPositionIdx(atomicPositionBlockData.getStringIdxToPositionIdxList())
.positions(toRectangle2DList(atomicPositionBlockData.getPositionsList()))
.italicTextRanges(atomicTextBlockData.getItalicTextRangesList()
.stream()
.map(r -> new TextRange(r.getStart(), r.getEnd()))
.toList())
.boldTextRanges(atomicTextBlockData.getBoldTextRangesList()
.stream()
.map(r -> new TextRange(r.getStart(), r.getEnd()))
.toList())
.parent(parent)
.build();
}
private static List<Rectangle2D> toRectangle2DList(float[][] positions) {
private static List<Rectangle2D> toRectangle2DList(List<Position> positions) {
return Arrays.stream(positions)
.map(floatArr -> (Rectangle2D) new Rectangle2D.Float(floatArr[0], floatArr[1], floatArr[2], floatArr[3]))
return positions.stream()
.map(pos -> (Rectangle2D) new Rectangle2D.Float(pos.getValue(0), pos.getValue(1), pos.getValue(2), pos.getValue(3)))
.toList();
}
@ -121,8 +145,31 @@ public class AtomicTextBlock implements TextBlock {
}
@Override
public String getSearchTextLowerCase() {
String text = null;
if (searchTextLowerCaseCache != null) {
text = searchTextLowerCaseCache.get();
}
if (text == null) {
text = getSearchText().toLowerCase(Locale.ENGLISH);
searchTextLowerCaseCache = new SoftReference<>(text);
}
return text;
}
public List<String> getWords() {
List<String> words = null;
if (wordsCache != null) {
words = wordsCache.get();
}
if (words == null) {
words = new ArrayList<>();
BreakIterator iterator = BreakIterator.getWordInstance(Locale.ENGLISH);
@ -131,6 +178,7 @@ public class AtomicTextBlock implements TextBlock {
for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) {
words.add(searchText.substring(start, end));
}
wordsCache = new SoftReference<>(words);
}
return words;
}

View File

@ -3,11 +3,13 @@ package com.iqser.red.service.redaction.v1.server.model.document.textblock;
import static java.lang.String.format;
import java.awt.geom.Rectangle2D;
import java.lang.ref.SoftReference;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Collection;
import java.util.Locale;
import java.util.Map;
import java.util.stream.Stream;
@ -16,6 +18,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import lombok.AccessLevel;
import lombok.Data;
import lombok.NonNull;
import lombok.experimental.FieldDefaults;
@Data
@ -25,6 +28,7 @@ public class ConcatenatedTextBlock implements TextBlock {
List<AtomicTextBlock> atomicTextBlocks;
String searchText;
TextRange textRange;
SoftReference<String> searchTextLowerCaseCache;
public static ConcatenatedTextBlock empty() {
@ -100,6 +104,23 @@ public class ConcatenatedTextBlock implements TextBlock {
}
@Override
public String getSearchTextLowerCase() {
String text = null;
if (searchTextLowerCaseCache != null) {
text = searchTextLowerCaseCache.get();
}
if (text == null) {
text = getSearchText().toLowerCase(Locale.ENGLISH);
searchTextLowerCaseCache = new SoftReference<>(text);
}
return text;
}
@Override
public List<String> getWords() {
@ -142,6 +163,26 @@ public class ConcatenatedTextBlock implements TextBlock {
}
@Override
public List<TextRange> getItalicTextRanges() {
return getAtomicTextBlocks().stream()
.flatMap(atomicTextBlock -> atomicTextBlock.getItalicTextRanges()
.stream())
.toList();
}
@Override
public List<TextRange> getBoldTextRanges() {
return getAtomicTextBlocks().stream()
.flatMap(atomicTextBlock -> atomicTextBlock.getBoldTextRanges()
.stream())
.toList();
}
@Override
public Rectangle2D getPosition(int stringIdx) {
@ -259,6 +300,7 @@ public class ConcatenatedTextBlock implements TextBlock {
}
@NonNull
@Override
public String toString() {

View File

@ -0,0 +1,72 @@
package com.iqser.red.service.redaction.v1.server.model.document.textblock;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.function.BinaryOperator;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collector;
import java.util.stream.Stream;
import lombok.NoArgsConstructor;
@NoArgsConstructor
public class ConsecutiveTextBlockCollector implements Collector<TextBlock, List<ConcatenatedTextBlock>, List<TextBlock>> {
@Override
public Supplier<List<ConcatenatedTextBlock>> supplier() {
return LinkedList::new;
}
@Override
public BiConsumer<List<ConcatenatedTextBlock>, TextBlock> accumulator() {
return (existingList, textBlock) -> {
if (existingList.isEmpty()) {
ConcatenatedTextBlock ctb = ConcatenatedTextBlock.empty();
ctb.concat(textBlock);
existingList.add(ctb);
return;
}
ConcatenatedTextBlock prevBlock = existingList.get(existingList.size() - 1);
if (prevBlock.getTextRange().end() == textBlock.getTextRange().start()) {
prevBlock.concat(textBlock);
} else {
ConcatenatedTextBlock ctb = ConcatenatedTextBlock.empty();
ctb.concat(textBlock);
existingList.add(ctb);
}
};
}
@Override
public BinaryOperator<List<ConcatenatedTextBlock>> combiner() {
return (list1, list2) -> Stream.concat(list1.stream(), list2.stream())
.toList();
}
@Override
public Function<List<ConcatenatedTextBlock>, List<TextBlock>> finisher() {
return a -> a.stream()
.map(tb -> (TextBlock) tb)
.toList();
}
@Override
public Set<Characteristics> characteristics() {
return Set.of(Characteristics.IDENTITY_FINISH);
}
}

View File

@ -19,6 +19,9 @@ public interface TextBlock extends CharSequence {
String getSearchText();
String getSearchTextLowerCase();
List<String> getWords();
@ -52,6 +55,12 @@ public interface TextBlock extends CharSequence {
String subSequenceWithLineBreaks(TextRange textRange);
List<TextRange> getItalicTextRanges();
List<TextRange> getBoldTextRanges();
int numberOfLines();

View File

@ -1,23 +1,27 @@
package com.iqser.red.service.redaction.v1.server.service.document;
package com.iqser.red.service.redaction.v1.server.utils;
import java.util.List;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.IntersectingNodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import lombok.experimental.UtilityClass;
@UtilityClass
public class EntityCreationUtility {
public static void checkIfBothStartAndEndAreEmpty(String start, String end) {
public void checkIfBothStartAndEndAreEmpty(String start, String end) {
checkIfBothStartAndEndAreEmpty(List.of(start), List.of(end));
}
public static <T> void checkIfBothStartAndEndAreEmpty(List<T> start, List<T> end) {
public <T> void checkIfBothStartAndEndAreEmpty(List<T> start, List<T> end) {
if ((start == null || start.isEmpty()) && (end == null || end.isEmpty())) {
throw new IllegalArgumentException("Start and end values are empty!");
@ -25,7 +29,7 @@ public class EntityCreationUtility {
}
public static int truncateEndIfLineBreakIsBetween(int end, int expandedEnd, TextBlock textBlock) {
public int truncateEndIfLineBreakIsBetween(int end, int expandedEnd, TextBlock textBlock) {
if (textBlock.getNextLinebreak(end) < expandedEnd) {
return end;
@ -34,7 +38,7 @@ public class EntityCreationUtility {
}
public static Set<SemanticNode> findIntersectingSubNodes(SemanticNode initialIntersectingNode, TextRange textRange) {
public Set<SemanticNode> findIntersectingSubNodes(SemanticNode initialIntersectingNode, TextRange textRange) {
IntersectingNodeVisitor visitor = new IntersectingNodeVisitor(textRange);
@ -46,7 +50,7 @@ public class EntityCreationUtility {
}
public static void addToPages(TextEntity entity) {
public void addToPages(TextEntity entity) {
Set<Page> pages = entity.getDeepestFullyContainingNode().getPages(entity.getTextRange());
entity.getPages().addAll(pages);
@ -54,14 +58,14 @@ public class EntityCreationUtility {
}
public static void addEntityToNodeEntitySets(TextEntity entity) {
public void addEntityToNodeEntitySets(TextEntity entity) {
entity.getIntersectingNodes()
.forEach(node -> node.getEntities().add(entity));
}
public static boolean allEntitiesIntersectAndHaveSameTypes(List<TextEntity> entitiesToMerge) {
public boolean allEntitiesIntersectAndHaveSameTypes(List<TextEntity> entitiesToMerge) {
if (entitiesToMerge.isEmpty()) {
return true;
@ -79,7 +83,7 @@ public class EntityCreationUtility {
}
public static TextRange toLineAfterTextRange(TextBlock textBlock, TextRange textRange) {
public TextRange toLineAfterTextRange(TextBlock textBlock, TextRange textRange) {
if (textBlock.getTextRange().end() == textRange.end()) {
return new TextRange(textRange.end(), textRange.end());

View File

@ -1,39 +1,36 @@
package com.iqser.red.service.redaction.v1.server.service.document;
package com.iqser.red.service.redaction.v1.server.utils;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
import lombok.RequiredArgsConstructor;
import lombok.AccessLevel;
import lombok.experimental.FieldDefaults;
import lombok.experimental.UtilityClass;
@Service
@RequiredArgsConstructor
@UtilityClass
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class EntityEnrichmentService {
private final RedactionServiceSettings redactionServiceSettings;
int SURROUNDING_WORDS_OFFSET_WINDOW = 100;
int NUMBER_OF_SURROUNDING_WORDS = 3;
public void enrichEntity(TextEntity entity, TextBlock textBlock) {
entity.setValue(textBlock.subSequence(entity.getTextRange()).toString());
entity.setTextAfter(findTextAfter(entity.getTextRange().end(), textBlock));
entity.setTextBefore(findTextBefore(entity.getTextRange().start(), textBlock));
}
private String findTextAfter(int index, TextBlock textBlock) {
int endOffset = Math.min(index + redactionServiceSettings.getSurroundingWordsOffsetWindow(), textBlock.getTextRange().end());
int endOffset = Math.min(index + SURROUNDING_WORDS_OFFSET_WINDOW, textBlock.getTextRange().end());
String textAfter = textBlock.subSequence(index, endOffset).toString();
if (!textAfter.isBlank()) {
List<String> wordsAfter = splitToWordsAndRemoveEmptyWords(textAfter);
int numberOfWordsAfter = Math.min(wordsAfter.size(), redactionServiceSettings.getNumberOfSurroundingWords());
int numberOfWordsAfter = Math.min(wordsAfter.size(), NUMBER_OF_SURROUNDING_WORDS);
if (!wordsAfter.isEmpty()) {
return concatWordsAfter(wordsAfter.subList(0, numberOfWordsAfter), textAfter.startsWith(" "));
}
@ -41,14 +38,12 @@ public class EntityEnrichmentService {
return "";
}
private String findTextBefore(int index, TextBlock textBlock) {
int offsetBefore = Math.max(index - redactionServiceSettings.getSurroundingWordsOffsetWindow(), textBlock.getTextRange().start());
int offsetBefore = Math.max(index - SURROUNDING_WORDS_OFFSET_WINDOW, textBlock.getTextRange().start());
String textBefore = textBlock.subSequence(offsetBefore, index).toString();
if (!textBefore.isBlank()) {
List<String> wordsBefore = splitToWordsAndRemoveEmptyWords(textBefore);
int numberOfWordsBefore = Math.min(wordsBefore.size(), redactionServiceSettings.getNumberOfSurroundingWords());
int numberOfWordsBefore = Math.min(wordsBefore.size(), NUMBER_OF_SURROUNDING_WORDS);
if (!wordsBefore.isEmpty()) {
return concatWordsBefore(wordsBefore.subList(wordsBefore.size() - numberOfWordsBefore, wordsBefore.size()), textBefore.endsWith(" "));
}
@ -56,36 +51,26 @@ public class EntityEnrichmentService {
return "";
}
private static List<String> splitToWordsAndRemoveEmptyWords(String textAfter) {
return Arrays.stream(textAfter.split(" "))
private List<String> splitToWordsAndRemoveEmptyWords(String text) {
return Arrays.stream(text.split(" "))
.filter(word -> !Objects.equals("", word))
.toList();
}
private static String concatWordsBefore(List<String> words, boolean endWithSpace) {
private String concatWordsBefore(List<String> words, boolean endWithSpace) {
StringBuilder sb = new StringBuilder();
for (String word : words) {
sb.append(word).append(" ");
}
String result = sb.toString().trim();
return endWithSpace ? result + " " : result;
}
private static String concatWordsAfter(List<String> words, boolean startWithSpace) {
private String concatWordsAfter(List<String> words, boolean startWithSpace) {
StringBuilder sb = new StringBuilder();
for (String word : words) {
sb.append(word).append(" ");
}
String result = sb.toString().trim();
return startWithSpace ? " " + result : result;
}

View File

@ -3,12 +3,17 @@ package com.iqser.red.service.redaction.v1.server.utils;
import static java.lang.String.format;
import java.awt.geom.Rectangle2D;
import java.awt.geom.RectangularShape;
import java.util.LinkedList;
import java.util.List;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.IntStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
@ -18,6 +23,9 @@ import lombok.experimental.UtilityClass;
@UtilityClass
public class RedactionSearchUtility {
private static final Logger log = LoggerFactory.getLogger(RedactionSearchUtility.class);
/**
* Checks if any part of a CharSequence matches a given regex pattern.
*
@ -154,14 +162,8 @@ public class RedactionSearchUtility {
*/
public static TextRange findTextRangesOfAllLinesInYRange(double maxY, double minY, TextBlock textBlock) {
List<TextRange> lineBoundaries = IntStream.range(0, textBlock.numberOfLines()).boxed()
.map(textBlock::getLineTextRange)
.filter(lineBoundary -> isWithinYRange(maxY, minY, textBlock, lineBoundary))
.toList();
if (lineBoundaries.isEmpty()) {
return new TextRange(textBlock.getTextRange().start(), textBlock.getTextRange().start());
}
return TextRange.merge(lineBoundaries);
Predicate<TextRange> isWithinYRange = lineBoundary -> isWithinYRange(maxY, minY, textBlock, lineBoundary);
return filterLineBoundaries(textBlock, isWithinYRange);
}
@ -172,6 +174,49 @@ public class RedactionSearchUtility {
}
/**
* Identifies all lines within a text block that have roughly the same vertical coordinates.
*
* @param maxY The maximum Y-coordinate of the vertical range.
* @param minY The minimum Y-coordinate of the vertical range.
* @param textBlock The text block containing the lines to be checked.
* @return A {@link TextRange} encompassing all lines within the specified Y-coordinate range.
*/
public static TextRange findTextRangesOfAllLinesWithCloseYCoordinates(Double maxY, Double minY, TextBlock textBlock) {
double averageLineHeight = IntStream.range(0, textBlock.numberOfLines()).boxed()
.map(textBlock::getLineTextRange)
.flatMap((TextRange stringTextRange) -> textBlock.getPositions(stringTextRange)
.stream())
.map(RectangularShape::getHeight)
.mapToDouble(Double::doubleValue).average()
.orElse(0);
Predicate<TextRange> hasCloseYRange = lineBoundary -> areYCoordinatesClose(maxY, minY, textBlock, lineBoundary, averageLineHeight);
return filterLineBoundaries(textBlock, hasCloseYRange);
}
private static boolean areYCoordinatesClose(Double maxY, Double minY, TextBlock textBlock, TextRange lineTextRange, double averageLineHeight) {
Rectangle2D lineBBox = RectangleTransformations.rectangle2DBBox(textBlock.getPositions(lineTextRange));
return Math.abs(lineBBox.getMinY() - minY) <= averageLineHeight && Math.abs(maxY - lineBBox.getMaxY()) <= averageLineHeight;
}
private static TextRange filterLineBoundaries(TextBlock textBlock, Predicate<TextRange> textRangePredicate) {
List<TextRange> lineBoundaries = IntStream.range(0, textBlock.numberOfLines()).boxed()
.map(textBlock::getLineTextRange)
.filter(textRangePredicate)
.toList();
if (lineBoundaries.isEmpty()) {
return new TextRange(textBlock.getTextRange().start(), textBlock.getTextRange().start());
}
return TextRange.merge(lineBoundaries);
}
/**
* Finds TextRanges matching a regex pattern within a TextBlock.
*
@ -264,8 +309,12 @@ public class RedactionSearchUtility {
Matcher matcher = pattern.matcher(textBlock.subSequence(textBlock.getTextRange()));
List<TextRange> boundaries = new LinkedList<>();
while (matcher.find()) {
boundaries.add(new TextRange(matcher.start(group) + textBlock.getTextRange().start(), matcher.end(group) + textBlock.getTextRange().start()));
try {
while (matcher.find()) {
boundaries.add(new TextRange(matcher.start(group) + textBlock.getTextRange().start(), matcher.end(group) + textBlock.getTextRange().start()));
}
} catch (StackOverflowError stackOverflowError) {
log.warn("Stackoverflow error for pattern {} in text: {}", pattern.pattern(), textBlock);
}
return boundaries;
}
@ -276,8 +325,12 @@ public class RedactionSearchUtility {
String searchTextWithLineBreaks = textBlock.searchTextWithLineBreaks();
Matcher matcher = pattern.matcher(searchTextWithLineBreaks);
List<TextRange> boundaries = new LinkedList<>();
while (matcher.find()) {
boundaries.add(new TextRange(matcher.start(group) + textBlock.getTextRange().start(), matcher.end(group) + textBlock.getTextRange().start()));
try {
while (matcher.find()) {
boundaries.add(new TextRange(matcher.start(group) + textBlock.getTextRange().start(), matcher.end(group) + textBlock.getTextRange().start()));
}
} catch (StackOverflowError stackOverflowError) {
log.warn("Stackoverflow error for pattern {} in text with linebreaks: {}", pattern.pattern(), searchTextWithLineBreaks);
}
return boundaries;
}

View File

@ -0,0 +1,25 @@
syntax = "proto3";
option java_outer_classname = "DocumentPageProto";
option java_package = "com.iqser.red.service.redaction.v1.server.data";
message AllDocumentPages {
repeated DocumentPage documentPages = 1;
}
message DocumentPage {
// The page number, starting with 1.
int32 number = 1;
// The page height in PDF user units.
int32 height = 2;
// The page width in PDF user units.
int32 width = 3;
// The page rotation as specified by the PDF.
int32 rotation = 4;
}

View File

@ -0,0 +1,28 @@
syntax = "proto3";
option java_outer_classname = "DocumentPositionDataProto";
option java_package = "com.iqser.red.service.redaction.v1.server.data";
message AllDocumentPositionData {
repeated DocumentPositionData documentPositionData = 1;
}
message DocumentPositionData {
// Identifier of the text block.
int64 id = 1;
// For each string coordinate in the search text of the text block, the array contains an entry relating the string coordinate to the position coordinate.
// This is required due to the text and position coordinates not being equal.
repeated int32 stringIdxToPositionIdx = 2;
// The bounding box for each glyph as a rectangle. This matrix is of size (n,4), where n is the number of glyphs in the text block.
// The second dimension specifies the rectangle with the value x, y, width, height, with x, y specifying the lower left corner.
// In order to access this information, the stringIdxToPositionIdx array must be used to transform the coordinates.
repeated Position positions = 3;
// Definition of a BoundingBox that contains x, y, width, and height.
message Position {
repeated float value = 1;
}
}

View File

@ -0,0 +1,12 @@
syntax = "proto3";
option java_outer_classname = "DocumentStructureProto";
option java_package = "com.iqser.red.service.redaction.v1.server.data";
import "EntryData.proto";
message DocumentStructure {
// The root EntryData represents the Document.
EntryData root = 1;
}

View File

@ -0,0 +1,40 @@
syntax = "proto3";
import "Range.proto";
option java_outer_classname = "DocumentTextDataProto";
option java_package = "com.iqser.red.service.redaction.v1.server.data";
message AllDocumentTextData {
repeated DocumentTextData documentTextData = 1;
}
message DocumentTextData {
// Identifier of the text block.
int64 id = 1;
// The page the text block occurs on.
int64 page = 2;
// The text of the text block.
string searchText = 3;
// Each text block is assigned a number on a page, starting from 0.
int32 numberOnPage = 4;
// The text blocks are ordered, this number represents the start of the text block as a string offset.
int32 start = 5;
// The text blocks are ordered, this number represents the end of the text block as a string offset.
int32 end = 6;
// The line breaks in the text of this semantic node in string offsets. They are exclusive end. At the end of each semantic node there is an implicit linebreak.
repeated int32 lineBreaks = 7;
// The text ranges where the text is italic
repeated Range italicTextRanges = 8;
// The text ranges where the text is bold
repeated Range boldTextRanges = 9;
}

View File

@ -0,0 +1,30 @@
syntax = "proto3";
import "LayoutEngine.proto";
import "NodeType.proto";
option java_outer_classname = "EntryDataProto";
option java_package = "com.iqser.red.service.redaction.v1.server.data";
message EntryData {
// Type of the semantic node.
NodeType type = 1;
// Specifies the position in the parsed tree structure.
repeated int32 treeId = 2;
// Specifies the text block IDs associated with this semantic node.
repeated int64 atomicBlockIds = 3;
// Specifies the pages this semantic node appears on.
repeated int64 pageNumbers = 4;
// Some semantic nodes have additional information, this information is stored in this Map.
map<string, string> properties = 5;
// All child Entries of this Entry.
repeated EntryData children = 6;
// Describes the origin of the semantic node.
repeated LayoutEngine engines = 7;
}

View File

@ -0,0 +1,10 @@
syntax = "proto3";
option java_outer_classname = "LayoutEngineProto";
option java_package = "com.iqser.red.service.redaction.v1.server.data";
enum LayoutEngine {
ALGORITHM = 0;
AI = 1;
OUTLINE = 2;
}

View File

@ -0,0 +1,19 @@
syntax = "proto3";
option java_outer_classname = "NodeTypeProto";
option java_package = "com.iqser.red.service.redaction.v1.server.data";
enum NodeType {
DOCUMENT = 0;
SECTION = 1;
SUPER_SECTION = 2;
HEADLINE = 3;
PARAGRAPH = 4;
TABLE = 5;
TABLE_CELL = 6;
IMAGE = 7;
HEADER = 8;
FOOTER = 9;
TABLE_OF_CONTENTS = 10;
TABLE_OF_CONTENTS_ITEM = 11;
}

View File

@ -0,0 +1,14 @@
syntax = "proto3";
option java_outer_classname = "RangeProto";
option java_package = "com.iqser.red.service.redaction.v1.server.data";
message Range {
// A start index.
int32 start = 1;
// An end index.
int32 end = 2;
}

View File

@ -0,0 +1,26 @@
#!/bin/bash
# Minimum required protoc version
MIN_VERSION="28.3"
# Get the installed protoc version
INSTALLED_VERSION=$(protoc --version | awk '{print $2}')
# Function to compare versions
version_lt() {
[ "$(printf '%s\n' "$1" "$2" | sort -V | head -n1)" != "$1" ]
}
# Check if protoc is installed and meets the minimum version
if ! command -v protoc &> /dev/null; then
echo "Error: protoc is not installed. Please install version $MIN_VERSION or later."
exit 1
fi
if version_lt "$INSTALLED_VERSION" "$MIN_VERSION"; then
echo "Error: protoc version $INSTALLED_VERSION is too old. Please upgrade to version $MIN_VERSION or later."
exit 1
fi
# Generate Java files from proto files
protoc --java_out=../java ./*.proto

View File

@ -0,0 +1,33 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import static org.junit.jupiter.api.Assertions.assertEquals;
import org.junit.jupiter.api.Test;
import com.iqser.red.service.redaction.v1.server.data.LayoutEngineProto;
public class LayoutEngineMappingTest {
@Test
public void assertAllValuesMatch() {
for (LayoutEngine value : LayoutEngine.values()) {
var engine = LayoutEngineProto.LayoutEngine.valueOf(value.name());
assertEquals(engine.name(), value.name());
}
}
@Test
public void assertAllValuesMatchReverse() {
for (LayoutEngineProto.LayoutEngine value : LayoutEngineProto.LayoutEngine.values()) {
if (value.equals(LayoutEngineProto.LayoutEngine.UNRECOGNIZED)) {
continue;
}
var engine = LayoutEngine.valueOf(value.name());
assertEquals(engine.name(), value.name());
}
}
}

View File

@ -0,0 +1,33 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import static org.junit.jupiter.api.Assertions.assertEquals;
import org.junit.jupiter.api.Test;
import com.iqser.red.service.redaction.v1.server.data.NodeTypeProto;
public class NodeTypeMappingTest {
@Test
public void assertAllValuesMatch() {
for (NodeType value : NodeType.values()) {
var engine = NodeTypeProto.NodeType.valueOf(value.name());
assertEquals(engine.name(), value.name());
}
}
@Test
public void assertAllValuesMatchReverse() {
for (NodeTypeProto.NodeType value : NodeTypeProto.NodeType.values()) {
if (value.equals(NodeTypeProto.NodeType.UNRECOGNIZED)) {
continue;
}
var engine = NodeType.valueOf(value.name());
assertEquals(engine.name(), value.name());
}
}
}

View File

@ -0,0 +1,144 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.List;
import org.junit.jupiter.api.Test;
class SectionIdentifierTest {
@Test
void testSectionIdentifier() {
SectionIdentifier identifier = SectionIdentifier.fromSearchText("1.1.2: Headline");
assertEquals(SectionIdentifier.Format.NUMERICAL, identifier.getFormat());
assertEquals(3, identifier.level());
assertEquals(List.of(1, 1, 2), identifier.getIdentifiers());
SectionIdentifier child = SectionIdentifier.asChildOf(identifier);
assertTrue(child.isChildOf(identifier));
SectionIdentifier parent = SectionIdentifier.fromSearchText("1.1: Headline");
assertTrue(parent.isParentOf(identifier));
}
@Test
void testSectionIdentifier2() {
SectionIdentifier identifier = SectionIdentifier.fromSearchText("A.1.2: Headline");
assertEquals(SectionIdentifier.Format.ALPHANUMERIC, identifier.getFormat());
assertEquals(3, identifier.level());
assertEquals(List.of(1, 1, 2), identifier.getIdentifiers());
}
@Test
void testSectionIdentifier3() {
SectionIdentifier identifier = SectionIdentifier.fromSearchText("D.1.2: Headline");
assertEquals(SectionIdentifier.Format.ALPHANUMERIC, identifier.getFormat());
assertEquals(3, identifier.level());
assertEquals(List.of(4, 1, 2), identifier.getIdentifiers());
}
@Test
void testSectionIdentifier4() {
SectionIdentifier identifier = SectionIdentifier.fromSearchText("4.1.2.4: Headline");
assertEquals(SectionIdentifier.Format.NUMERICAL, identifier.getFormat());
assertEquals(4, identifier.level());
assertEquals(List.of(4, 1, 2, 4), identifier.getIdentifiers());
}
@Test
void testSectionIdentifier5() {
SectionIdentifier identifier = SectionIdentifier.fromSearchText("D.1.2.4.5: Headline");
assertEquals(SectionIdentifier.Format.ALPHANUMERIC, identifier.getFormat());
assertEquals(4, identifier.level());
assertEquals(List.of(4, 1, 2, 4), identifier.getIdentifiers());
}
@Test
void testSectionIdentifier6() {
SectionIdentifier identifier = SectionIdentifier.fromSearchText("d.1.2.4.5: Headline");
assertEquals(SectionIdentifier.Format.ALPHANUMERIC, identifier.getFormat());
assertEquals(4, identifier.level());
assertEquals(List.of(4, 1, 2, 4), identifier.getIdentifiers());
}
@Test
void testSectionIdentifier7() {
SectionIdentifier identifier = SectionIdentifier.fromSearchText("4.1.2.4.5: Headline");
assertEquals(SectionIdentifier.Format.NUMERICAL, identifier.getFormat());
assertEquals(4, identifier.level());
assertEquals(List.of(4, 1, 2, 4), identifier.getIdentifiers());
}
@Test
void testFalsePositive111() {
SectionIdentifier identifier = SectionIdentifier.fromSearchText("111: Headline");
assertEquals(SectionIdentifier.Format.NUMERICAL, identifier.getFormat());
assertEquals(1, identifier.level());
}
@Test
public void testParentOf() {
var headline = SectionIdentifier.fromSearchText("1 Did you ever hear the tragedy of Darth Plagueis The Wise?");
var headline1 = SectionIdentifier.fromSearchText("1.0 I thought not. Its not a story the Jedi would tell you.");
var headline2 = SectionIdentifier.fromSearchText("1.1 Its a Sith legend. Darth Plagueis was a Dark Lord of the Sith, ");
var headline3 = SectionIdentifier.fromSearchText("1.2.3 so powerful and so wise he could use the Force to influence the midichlorians to create life…");
var headline4 = SectionIdentifier.fromSearchText("1.2.3.4 He had such a knowledge of the dark side that he could even keep the ones he cared about from dying.");
var headline5 = SectionIdentifier.fromSearchText("1.2.3.4.5 The dark side of the Force is a pathway to many abilities some consider to be unnatural.");
var headline6 = SectionIdentifier.fromSearchText("2.0 He became so powerful…");
var headline7 = SectionIdentifier.fromSearchText("10000.0 the only thing he was afraid of was losing his power,");
var headline8 = SectionIdentifier.fromSearchText("A.0 which eventually, of course, he did.");
var headline9 = SectionIdentifier.fromSearchText("Unfortunately, he taught his apprentice everything he knew, then his apprentice killed him in his sleep.");
var headline10 = SectionIdentifier.fromSearchText("2.1.2 Ironic.");
var headline11 = SectionIdentifier.fromSearchText("2.He could save others from death,");
var headline12 = SectionIdentifier.fromSearchText(" 2. but not himself.");
var paragraph1 = SectionIdentifier.asChildOf(headline);
assertTrue(paragraph1.isChildOf(headline));
assertTrue(headline.isParentOf(paragraph1));
assertFalse(paragraph1.isParentOf(headline));
assertFalse(headline.isParentOf(headline1));
assertTrue(headline.isParentOf(headline2));
assertTrue(headline.isParentOf(headline3));
assertTrue(headline.isParentOf(headline4));
assertTrue(headline.isParentOf(headline5));
assertTrue(headline1.isParentOf(headline2));
assertFalse(headline1.isParentOf(headline1));
assertTrue(headline3.isParentOf(headline4));
assertFalse(headline4.isParentOf(headline5));
assertFalse(headline2.isParentOf(headline3));
assertFalse(headline2.isParentOf(headline4));
assertTrue(headline1.isParentOf(headline3));
assertTrue(headline1.isParentOf(headline4));
assertFalse(headline1.isParentOf(headline6));
assertFalse(headline1.isParentOf(headline7));
assertFalse(headline8.isParentOf(headline1));
assertFalse(headline8.isParentOf(headline2));
assertFalse(headline8.isParentOf(headline3));
assertFalse(headline8.isParentOf(headline4));
assertFalse(headline9.isParentOf(headline9));
assertTrue(headline10.isChildOf(headline11));
assertTrue(headline10.isChildOf(headline12));
}
}

View File

@ -4,7 +4,7 @@ plugins {
}
description = "redaction-service-api-v1"
val persistenceServiceVersion = "2.473.0"
val persistenceServiceVersion = "2.631.0"
dependencies {
implementation("org.springframework:spring-web:6.0.12")

View File

@ -2,12 +2,18 @@ package com.iqser.red.service.redaction.v1.model;
public class QueueNames {
public static final String REDACTION_QUEUE = "redactionQueue";
public static final String REDACTION_PRIORITY_QUEUE = "redactionPriorityQueue";
public static final String REDACTION_ANALYSIS_RESPONSE_QUEUE = "redactionAnalysisResponseQueue";
public static final String REDACTION_DQL = "redactionDQL";
public static final String REDACTION_REQUEST_QUEUE_PREFIX = "redaction_request";
public static final String REDACTION_REQUEST_EXCHANGE = "redaction_request_exchange";
public static final String REDACTION_PRIORITY_REQUEST_QUEUE_PREFIX = "redaction_priority_request";
public static final String REDACTION_PRIORITY_REQUEST_EXCHANGE = "redaction_priority_request_exchange";
public static final String REDACTION_RESPONSE_EXCHANGE = "redaction_response_exchange";
public static final String REDACTION_DLQ = "redaction_error";
public static final String MIGRATION_QUEUE = "migrationQueue";
public static final String SEARCH_TERM_OCCURRENCES_RESPONSE_EXCHANGE = "search_bulk_local_term_response_exchange";
public static final String SEARCH_BULK_LOCAL_TERM_DLQ = "search_bulk_local_term_error";
public static final String MIGRATION_REQUEST_QUEUE = "migrationQueue";
public static final String MIGRATION_RESPONSE_QUEUE = "migrationResponseQueue";
public static final String MIGRATION_DLQ = "migrationDLQ";

View File

@ -12,11 +12,12 @@ plugins {
description = "redaction-service-server-v1"
val layoutParserVersion = "0.141.0"
val layoutParserVersion = "0.193.0"
val jacksonVersion = "2.15.2"
val droolsVersion = "9.44.0.Final"
val pdfBoxVersion = "3.0.0"
val persistenceServiceVersion = "2.492.0"
val persistenceServiceVersion = "2.641.0"
val llmServiceVersion = "1.20.0-RED10072.2"
val springBootStarterVersion = "3.1.5"
val springCloudVersion = "4.0.4"
val testContainersVersion = "1.19.7"
@ -33,23 +34,31 @@ configurations {
dependencies {
implementation(project(":redaction-service-api-v1")) { exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1") }
implementation(project(":document"))
implementation("com.iqser.red.service:persistence-service-internal-api-v1:${persistenceServiceVersion}") { exclude(group = "org.springframework.boot") }
implementation("com.iqser.red.service:persistence-service-shared-mongo-v1:${persistenceServiceVersion}")
{
exclude(group = "com.knecon.fforesight", module = "tenant-commons")
}
implementation("com.knecon.fforesight:layoutparser-service-internal-api:${layoutParserVersion}")
implementation("com.knecon.fforesight:llm-service-api:${llmServiceVersion}")
implementation("com.iqser.red.commons:spring-commons:6.2.0")
implementation("com.iqser.red.commons:metric-commons:2.3.0")
implementation("com.iqser.red.commons:dictionary-merge-commons:1.5.0")
implementation("com.iqser.red.commons:storage-commons:2.45.0")
implementation("com.knecon.fforesight:keycloak-commons:0.29.0")
implementation("com.knecon.fforesight:tenant-commons:0.25.0")
implementation("com.iqser.red.commons:storage-commons:2.50.0")
implementation("com.knecon.fforesight:tenant-commons:0.31.0")
implementation("com.knecon.fforesight:keycloak-commons:0.30.0") {
exclude(group = "com.knecon.fforesight", module = "tenant-commons")
}
implementation("com.knecon.fforesight:tracing-commons:0.5.0")
implementation("com.knecon.fforesight:lifecycle-commons:0.6.0")
implementation("com.knecon.fforesight:lifecycle-commons:0.7.0")
implementation("com.fasterxml.jackson.module:jackson-module-afterburner:${jacksonVersion}")
implementation("com.fasterxml.jackson.datatype:jackson-datatype-jsr310:${jacksonVersion}")
implementation("org.ahocorasick:ahocorasick:0.6.3")
implementation("org.ahocorasick:ahocorasick:0.9.0")
implementation("com.hankcs:aho-corasick-double-array-trie:1.2.2")
implementation("com.github.roklenarcic:aho-corasick:1.2")
implementation("org.javassist:javassist:3.29.2-GA")
implementation("org.drools:drools-engine:${droolsVersion}")
@ -68,8 +77,11 @@ dependencies {
implementation("org.apache.tomcat:tomcat-websocket:${tomcatVersion}")
implementation("org.apache.tomcat.embed:tomcat-embed-core:${tomcatVersion}")
implementation("org.liquibase:liquibase-core:4.29.2") // Needed to be set explicit, otherwise spring dependency management sets it to 4.20.0
implementation("org.liquibase.ext:liquibase-mongodb:4.29.2")
implementation("net.logstash.logback:logstash-logback-encoder:7.4")
implementation("ch.qos.logback:logback-classic")
api("ch.qos.logback:logback-classic")
implementation("org.reflections:reflections:0.10.2")
@ -90,7 +102,12 @@ dependencies {
group = "com.iqser.red.service",
module = "persistence-service-shared-api-v1"
)
exclude(
group = "com.knecon.fforesight",
module = "document"
)
}
testImplementation("com.pdftron:PDFNet:10.11.0")
}
dependencyManagement {
@ -115,6 +132,7 @@ tasks.named<BootBuildImage>("bootBuildImage") {
"BPE_APPEND_JAVA_TOOL_OPTIONS",
"-XX:MaxMetaspaceSize=1g -Dfile.encoding=UTF-8 -Dkie.repository.project.cache.size=50 -Dkie.repository.project.versions.cache.size=5"
)
environment.put("BPE_DEFAULT_LANG", "en_US.utf8") // java.text.Normalizer does not care for file.encoding
imageName.set("nexus.knecon.com:5001/red/${project.name}")// must build image with same name always, otherwise the builder will not know which image to use as cache. DO NOT CHANGE!
if (project.hasProperty("buildbootDockerHostNetwork")) {
@ -138,18 +156,19 @@ tasks.named<BootBuildImage>("bootBuildImage") {
}
}
fun parseDroolsImports(droolsFilePath: String): List<String> {
fun parseDroolsImports(vararg droolsFilePaths: String): List<String> {
val imports = mutableListOf<String>()
val importPattern = Regex("^import\\s+(com\\.iqser\\.red\\.service\\.redaction\\.v1\\.[\\w.]+);")
val desiredPrefix = "com.iqser.red.service.redaction.v1"
File(droolsFilePath).forEachLine { line ->
importPattern.find(line)?.let { matchResult ->
val importPath = matchResult.groupValues[1].trim()
if (importPath.startsWith(desiredPrefix)) {
val formattedPath = importPath.replace('.', '/')
imports.add("$formattedPath.java")
droolsFilePaths.forEach { filePath ->
File(filePath).forEachLine { line ->
importPattern.find(line)?.let { matchResult ->
val importPath = matchResult.groupValues[1].trim()
if (importPath.startsWith(desiredPrefix)) {
val formattedPath = importPath.replace('.', '/')
imports.add("$formattedPath.java")
}
}
}
}
@ -157,22 +176,30 @@ fun parseDroolsImports(droolsFilePath: String): List<String> {
return imports
}
val droolsImports = parseDroolsImports("redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/all_rules_documine.drl")
// Combine imports from both drools files
val droolsImports = parseDroolsImports(
"redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/all_rules_documine.drl",
"redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/base_component_rules.drl"
)
tasks.register("generateJavaDoc", Javadoc::class) {
dependsOn("compileJava")
dependsOn("delombok")
classpath = project.sourceSets["main"].runtimeClasspath
source = fileTree("${buildDir}/generated/sources/delombok/java/main") {
val documentFiles = fileTree("${project(":document").layout.buildDirectory.get()}/generated/sources/delombok/java/main") {
include(droolsImports)
}
destinationDir = file(project.findProperty("javadocDestinationDir")?.toString() ?: "")
val mainFiles = fileTree("${layout.buildDirectory.get()}/generated/sources/delombok/java/main") {
include(droolsImports)
}
source = documentFiles + mainFiles
setDestinationDir(file(project.findProperty("javadocDestinationDir")?.toString() ?: ""))
options.memberLevel = JavadocMemberLevel.PUBLIC
(options as StandardJavadocDocletOptions).apply {
header = "Redaction Service ${project.version}"
footer = "Redaction Service ${project.version}"
title = "API Documentation for Redaction Service ${project.version}"
}
}

View File

@ -10,11 +10,13 @@ import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.reflections.Reflections;
import org.reflections.scanners.Scanners;
import org.reflections.util.ConfigurationBuilder;
import org.reflections.util.FilterBuilder;
import com.iqser.red.service.redaction.v1.server.model.dictionary.SearchImplementation;
@ -25,6 +27,8 @@ import lombok.extern.slf4j.Slf4j;
public class DeprecatedElementsFinder {
public static final String PACKAGE_NAME = "com.iqser.red.service.redaction.v1.server";
public static final Pattern DATA_PACKAGE = Pattern.compile(".*/data/.*");
private Set<Method> deprecatedMethods;
@Getter
private Map<String, String> deprecatedMethodsSignaturesMap;
@ -43,7 +47,10 @@ public class DeprecatedElementsFinder {
Reflections reflections = new Reflections(new ConfigurationBuilder().forPackage(PACKAGE_NAME)
.setExpandSuperTypes(true)
.setScanners(Scanners.MethodsAnnotated, Scanners.TypesAnnotated, Scanners.SubTypes));
.setScanners(Scanners.MethodsAnnotated, Scanners.TypesAnnotated, Scanners.SubTypes)
.filterInputsBy(new FilterBuilder().includePackage(PACKAGE_NAME).excludePackage(PACKAGE_NAME + ".data")
// Exclude the generated proto data package
));
deprecatedMethods = reflections.get(Scanners.MethodsAnnotated.with(Deprecated.class).as(Method.class));

View File

@ -22,18 +22,28 @@ public class RedactionServiceSettings {
private boolean nerServiceEnabled = true;
private boolean azureNerServiceEnabled;
private boolean llmNerServiceEnabled;
private boolean priorityMode;
private long firstLevelDictionaryCacheMaximumSize = 1000;
private long dictionaryCacheMaximumSize = 100;
private int dictionaryCacheExpireAfterAccessDays = 3;
private int droolsExecutionTimeoutSecs = 300;
private int droolsExecutionTimeoutSecs = 600;
private boolean ruleExecutionSecured = true;
private boolean annotationMode;
private boolean droolsDebug;
private boolean protobufJsonFallback = true;
public int getDroolsExecutionTimeoutSecs(int numberOfPages) {

View File

@ -0,0 +1,10 @@
package com.iqser.red.service.redaction.v1.server.client;
import org.springframework.cloud.openfeign.FeignClient;
import com.iqser.red.service.persistence.service.v1.api.internal.resources.DateFormatsResource;
@FeignClient(name = "DateFormatsResource", url = "${persistence-service.url}")
public interface DateFormatsClient extends DateFormatsResource {
}

View File

@ -15,5 +15,6 @@ public class EntityRecognitionEntity {
private int startOffset;
private int endOffset;
private String type;
private Double confidence;
}

View File

@ -17,7 +17,20 @@ public final class Context {
private String dossierTemplateId;
@Setter
private long ruleVersion;
@Setter
private long dateFormatsVersion;
private int analysisNumber;
private String tenantId;
public Context(String fileId, String dossierId, String dossierTemplateId, long ruleVersion, int analysisNumber, String tenantId) {
this.fileId = fileId;
this.dossierId = dossierId;
this.dossierTemplateId = dossierTemplateId;
this.ruleVersion = ruleVersion;
this.analysisNumber = analysisNumber;
this.tenantId = tenantId;
}
}

View File

@ -0,0 +1,70 @@
package com.iqser.red.service.redaction.v1.server.logger;
import org.kie.api.definition.rule.Rule;
import org.kie.api.event.rule.DefaultRuleRuntimeEventListener;
import org.kie.api.event.rule.ObjectDeletedEvent;
import org.kie.api.event.rule.ObjectInsertedEvent;
import org.kie.api.event.rule.ObjectUpdatedEvent;
import lombok.AllArgsConstructor;
@AllArgsConstructor
public class ObjectTrackingEventListener extends DefaultRuleRuntimeEventListener {
RulesLogger logger;
@Override
public void objectInserted(ObjectInsertedEvent event) {
if (!logger.isObjectTrackingActive()) {
return;
}
if (event.getRule() == null) {
logger.logObjectTracking("ObjectInsertedEvent:{} has been inserted", event.getObject());
return;
}
logger.logObjectTracking("ObjectInsertedEvent:{}: {} has been inserted", formatRuleName(event.getRule()), event.getObject());
}
@Override
public void objectDeleted(ObjectDeletedEvent event) {
if (!logger.isObjectTrackingActive()) {
return;
}
if (event.getRule() == null) {
logger.logObjectTracking("ObjectDeletedEvent: {} has been deleted", event.getOldObject());
return;
}
logger.logObjectTracking("ObjectDeletedEvent: {}: {} has been deleted", formatRuleName(event.getRule()), event.getOldObject());
}
@Override
public void objectUpdated(ObjectUpdatedEvent event) {
if (!logger.isObjectTrackingActive()) {
return;
}
if (event.getRule() == null) {
logger.logObjectTracking("ObjectUpdatedEvent:{} has been updated", event.getObject());
return;
}
logger.logObjectTracking("ObjectUpdatedEvent:{}: {} has been updated", formatRuleName(event.getRule()), event.getObject());
}
public static String formatRuleName(Rule rule) {
String name = rule.getName();
if (name.length() > 20) {
return name.substring(0, 20) + "...";
}
return name;
}
}

View File

@ -1,30 +1,141 @@
package com.iqser.red.service.redaction.v1.server.logger;
import java.time.OffsetDateTime;
import java.util.regex.Pattern;
import org.slf4j.helpers.MessageFormatter;
import com.iqser.red.service.redaction.v1.server.service.websocket.WebSocketService;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
/**
* This class provides logging functionality specifically for rules execution
* in a Drools context. It is designed to log messages with different log levels
* (INFO, WARN, ERROR) and formats messages using a placeholder-based approach
* similar to popular logging frameworks like SLF4J. <p>
* <p>
* Log messages can include placeholders (i.e., `{}`), which will be replaced by
* the corresponding arguments when the message is formatted. <p>
* <p>
* Example usage:
* <pre>
* logger.info("Message with placeholder {}", object);
* </pre>
*/
@Slf4j
@RequiredArgsConstructor
public class RulesLogger {
private final WebSocketService webSocketService;
private final Context context;
@Getter
private boolean objectTrackingActive;
@Getter
private boolean agendaTrackingActive;
/**
* Logs a message at the INFO level.
*
* @param message The log message containing optional placeholders (i.e., `{}`).
* @param args The arguments to replace the placeholders in the message.
*/
public void info(String message, Object... args) {
log(LogLevel.INFO, message, args);
}
/**
* Logs a message at the WARN level.
*
* @param message The log message containing optional placeholders (i.e., `{}`).
* @param args The arguments to replace the placeholders in the message.
*/
public void warn(String message, Object... args) {
log(LogLevel.WARN, message, args);
}
/**
* Logs a message at the INFO level, if object tracking has been activated.
*
* @param message The log message containing optional placeholders (i.e., `{}`).
* @param args The arguments to replace the placeholders in the message.
*/
public void logObjectTracking(String message, Object... args) {
if (objectTrackingActive) {
info(message, args);
}
}
/**
* If object tracking is enabled, the RulesLogger will log all inserted/retracted/updated events.
* Initial value is disabled.
*/
public void enableObjectTracking() {
objectTrackingActive = true;
}
/**
* If object tracking is disabled, the RulesLogger won't log any inserted/retracted/updated events.
* Initial value is disabled.
*/
public void disableObjectTracking() {
objectTrackingActive = false;
}
/**
* Logs a message at the INFO level, if agenda tracking has been activated.
*
* @param message The log message containing optional placeholders (i.e., `{}`).
* @param args The arguments to replace the placeholders in the message.
*/
public void logAgendaTracking(String message, Object... args) {
if (agendaTrackingActive) {
info(message, args);
}
}
/**
* If agenda tracking is enabled, the RulesLogger will log each firing Rule with its name, objects and metadata.
* Initial value is disabled.
*/
public void enableAgendaTracking() {
agendaTrackingActive = true;
}
/**
* If agenda tracking is disabled, the RulesLogger won't log any rule firings.
* Initial value is disabled.
*/
public void disableAgendaTracking() {
agendaTrackingActive = false;
}
/**
* Logs a message at the ERROR level, including an exception.
*
* @param throwable The exception to log.
* @param message The log message containing optional placeholders (i.e., `{}`).
* @param args The arguments to replace the placeholders in the message.
*/
public void error(Throwable throwable, String message, Object... args) {
log(LogLevel.ERROR, message + " Exception: " + throwable.toString(), args);
@ -34,6 +145,11 @@ public class RulesLogger {
private void log(LogLevel logLevel, String message, Object... args) {
var formattedMessage = formatMessage(message, args);
switch (logLevel) {
case INFO -> log.info(message, args);
case WARN -> log.warn(message, args);
case ERROR -> log.error(message, args);
}
var ruleLog = RuleLogEvent.builder()
.tenantId(context.getTenantId())
.ruleVersion(context.getRuleVersion())
@ -52,22 +168,7 @@ public class RulesLogger {
private String formatMessage(String message, Object... args) {
if (args == null || args.length == 0) {
return message;
}
var pattern = Pattern.compile("\\{}");
var matcher = pattern.matcher(message);
var sb = new StringBuilder();
int i = 0;
while (matcher.find() && i < args.length) {
matcher.appendReplacement(sb, args[i] != null ? args[i].toString() : "null");
i++;
}
matcher.appendTail(sb);
return sb.toString();
return MessageFormatter.arrayFormat(message, args).getMessage();
}
}

View File

@ -0,0 +1,65 @@
package com.iqser.red.service.redaction.v1.server.logger;
import java.util.Map;
import org.kie.api.definition.rule.Rule;
import org.kie.api.event.rule.AfterMatchFiredEvent;
import org.kie.api.event.rule.DefaultAgendaEventListener;
import org.kie.api.event.rule.MatchCreatedEvent;
import lombok.AllArgsConstructor;
@AllArgsConstructor
public class TrackingAgendaEventListener extends DefaultAgendaEventListener {
private RulesLogger logger;
@Override
public void matchCreated(MatchCreatedEvent event) {
if (logger.isAgendaTrackingActive()) {
logger.logAgendaTracking(event.toString());
}
}
@Override
public void afterMatchFired(AfterMatchFiredEvent event) {
if (!logger.isAgendaTrackingActive()) {
return;
}
Rule rule = event.getMatch().getRule();
String ruleName = formatRuleName(rule);
Map<String, Object> ruleMetaDataMap = rule.getMetaData();
StringBuilder sb = new StringBuilder("AfterMatchFiredEvent: " + ruleName);
if (event.getMatch().getObjects() != null && !event.getMatch().getObjects().isEmpty()) {
sb.append(", ").append(event.getMatch().getObjects().size()).append(" objects: ");
for (Object object : event.getMatch().getObjects()) {
sb.append(object).append(", ");
}
sb.delete(sb.length() - 2, sb.length());
}
if (!ruleMetaDataMap.isEmpty()) {
sb.append("\n With [").append(ruleMetaDataMap.size()).append("] meta-data:");
for (String key : ruleMetaDataMap.keySet()) {
sb.append("\n key=").append(key).append(", value=").append(ruleMetaDataMap.get(key));
}
}
logger.logAgendaTracking(sb.toString());
}
public static String formatRuleName(Rule rule) {
return ObjectTrackingEventListener.formatRuleName(rule);
}
}

View File

@ -1,438 +0,0 @@
package com.iqser.red.service.redaction.v1.server.migration;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Change;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ChangeType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ManualChange;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ManualRedactionType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Point;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.server.service.DictionaryService;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class LegacyRedactionLogMergeService {
private final DictionaryService dictionaryService;
public RedactionLog addManualAddEntriesAndRemoveSkippedImported(RedactionLog redactionLog, ManualRedactions manualRedactions, String dossierTemplateId) {
Set<String> skippedImportedRedactions = new HashSet<>();
log.info("Adding manual add Entries and removing skipped or imported entries");
if (manualRedactions != null) {
var manualRedactionLogEntries = addManualAddEntries(manualRedactions.getEntriesToAdd(), redactionLog.getAnalysisNumber());
redactionLog.getRedactionLogEntry().addAll(manualRedactionLogEntries);
var manualRedactionWrappers = createManualRedactionWrappers(manualRedactions);
for (RedactionLogEntry entry : redactionLog.getRedactionLogEntry()) {
if (entry.isImported()) {
processRedactionLogEntry(manualRedactionWrappers.stream()
.filter(ManualRedactionWrapper::isApproved)
.filter(mr -> entry.getId().equals(mr.getId()))
.collect(Collectors.toList()), entry, dossierTemplateId);
if (!entry.isRedacted()) {
skippedImportedRedactions.add(entry.getId());
}
}
}
}
Set<String> processedIds = new HashSet<>();
redactionLog.getRedactionLogEntry().removeIf(entry -> {
if (entry.isFalsePositive()) {
return true;
}
if (entry.getImportedRedactionIntersections() != null) {
entry.getImportedRedactionIntersections().removeAll(skippedImportedRedactions);
if (!entry.getImportedRedactionIntersections().isEmpty() && (!entry.isImage() || entry.isImage() && !(entry.getType().equals("image") || entry.getType()
.equals("ocr")))) {
return true;
}
}
if (processedIds.contains(entry.getId())) {
log.info("Duplicate annotation found with id {}", entry.getId());
return true;
}
processedIds.add(entry.getId());
return false;
});
return redactionLog;
}
public long getNumberOfAffectedAnnotations(ManualRedactions manualRedactions) {
return createManualRedactionWrappers(manualRedactions).stream()
.map(ManualRedactionWrapper::getId)
.distinct()
.count();
}
private List<ManualRedactionWrapper> createManualRedactionWrappers(ManualRedactions manualRedactions) {
List<ManualRedactionWrapper> manualRedactionWrappers = new ArrayList<>();
manualRedactions.getRecategorizations()
.forEach(item -> {
if (item.getSoftDeletedTime() == null) {
manualRedactionWrappers.add(new ManualRedactionWrapper(item.getAnnotationId(), item.getRequestDate(), item, item.isApproved()));
}
});
manualRedactions.getIdsToRemove()
.forEach(item -> {
if (item.getSoftDeletedTime() == null) {
manualRedactionWrappers.add(new ManualRedactionWrapper(item.getAnnotationId(), item.getRequestDate(), item, item.isApproved()));
}
});
manualRedactions.getForceRedactions()
.forEach(item -> {
if (item.getSoftDeletedTime() == null) {
manualRedactionWrappers.add(new ManualRedactionWrapper(item.getAnnotationId(), item.getRequestDate(), item, item.isApproved()));
}
});
manualRedactions.getLegalBasisChanges()
.forEach(item -> {
if (item.getSoftDeletedTime() == null) {
manualRedactionWrappers.add(new ManualRedactionWrapper(item.getAnnotationId(), item.getRequestDate(), item, item.isApproved()));
}
});
manualRedactions.getResizeRedactions()
.forEach(item -> {
if (item.getSoftDeletedTime() == null) {
manualRedactionWrappers.add(new ManualRedactionWrapper(item.getAnnotationId(), item.getRequestDate(), item, item.isApproved()));
}
});
Collections.sort(manualRedactionWrappers);
return manualRedactionWrappers;
}
private void processRedactionLogEntry(List<ManualRedactionWrapper> manualRedactionWrappers, RedactionLogEntry redactionLogEntry, String dossierTemplateId) {
manualRedactionWrappers.forEach(mrw -> {
Object item = mrw.getItem();
if (item instanceof ManualRecategorization imageRecategorization) {
processManualImageRecategorization(redactionLogEntry, dossierTemplateId, imageRecategorization);
}
if (item instanceof IdRemoval manualRemoval) {
processIdRemoval(redactionLogEntry, manualRemoval);
}
if (item instanceof ManualForceRedaction manualForceRedact) {
processManualForceRedaction(redactionLogEntry, dossierTemplateId, manualForceRedact);
}
if (item instanceof ManualLegalBasisChange manualLegalBasisChange) {
processManualLegalBasisChange(redactionLogEntry, manualLegalBasisChange);
}
if (item instanceof ManualResizeRedaction manualResizeRedact) {
processManualResizeRedaction(redactionLogEntry, manualResizeRedact);
}
});
}
private void processManualImageRecategorization(RedactionLogEntry redactionLogEntry, String dossierTemplateId, ManualRecategorization imageRecategorization) {
String manualOverrideReason = null;
if (imageRecategorization.getStatus().equals(AnnotationStatus.APPROVED)) {
redactionLogEntry.setType(imageRecategorization.getType());
redactionLogEntry.setSection("Image:" + redactionLogEntry.getType());
if (dictionaryService.isHint(imageRecategorization.getType(), dossierTemplateId)) {
redactionLogEntry.setRedacted(false);
redactionLogEntry.setHint(true);
} else {
redactionLogEntry.setHint(false);
}
manualOverrideReason = mergeReasonIfNecessary(redactionLogEntry.getReason(), ", recategorized by manual override");
} else if (imageRecategorization.getStatus().equals(AnnotationStatus.REQUESTED)) {
manualOverrideReason = mergeReasonIfNecessary(redactionLogEntry.getReason(), ", requested to recategorize");
}
if (manualOverrideReason != null) {
redactionLogEntry.setReason(manualOverrideReason);
}
redactionLogEntry.getManualChanges()
.add(ManualChange.from(imageRecategorization)
.withManualRedactionType(ManualRedactionType.RECATEGORIZE)
.withChange("type", imageRecategorization.getType())
.withChange("section", imageRecategorization.getSection())
.withChange("legalBasis", imageRecategorization.getLegalBasis())
.withChange("value", imageRecategorization.getValue()));
}
private String mergeReasonIfNecessary(String currentReason, String addition) {
if (currentReason != null) {
if (!currentReason.contains(addition)) {
return currentReason + addition;
}
return currentReason;
} else {
return "";
}
}
private void processIdRemoval(RedactionLogEntry redactionLogEntry, IdRemoval manualRemoval) {
boolean isApprovedRedaction = manualRemoval.getStatus().equals(AnnotationStatus.APPROVED);
if (isApprovedRedaction && manualRemoval.isRemoveFromDictionary() && isBasedOnDictionaryOnly(redactionLogEntry)) {
log.debug("Skipping merge for dictionary-modifying entry");
} else {
String manualOverrideReason = null;
if (isApprovedRedaction) {
redactionLogEntry.setRedacted(false);
manualOverrideReason = mergeReasonIfNecessary(redactionLogEntry.getReason(), ", removed by manual override");
redactionLogEntry.setHint(false);
} else if (manualRemoval.getStatus().equals(AnnotationStatus.REQUESTED)) {
manualOverrideReason = mergeReasonIfNecessary(redactionLogEntry.getReason(), ", requested to remove");
}
if (manualOverrideReason != null) {
redactionLogEntry.setReason(manualOverrideReason);
}
}
redactionLogEntry.getManualChanges()
.add(ManualChange.from(manualRemoval)
.withManualRedactionType(manualRemoval.isRemoveFromDictionary() ? ManualRedactionType.REMOVE_FROM_DICTIONARY : ManualRedactionType.REMOVE_LOCALLY));
}
private boolean isBasedOnDictionaryOnly(RedactionLogEntry redactionLogEntry) {
return redactionLogEntry.getEngines().contains(Engine.DICTIONARY) && redactionLogEntry.getEngines().size() == 1;
}
private void processManualForceRedaction(RedactionLogEntry redactionLogEntry, String dossierTemplateId, ManualForceRedaction manualForceRedact) {
String manualOverrideReason = null;
var dictionaryIsHint = dictionaryService.isHint(redactionLogEntry.getType(), dossierTemplateId);
if (manualForceRedact.getStatus().equals(AnnotationStatus.APPROVED)) {
// Forcing a skipped hint should result in a hint
if (dictionaryIsHint) {
redactionLogEntry.setHint(true);
} else {
redactionLogEntry.setRedacted(true);
}
manualOverrideReason = mergeReasonIfNecessary(redactionLogEntry.getReason(), ", forced by manual override");
redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis());
} else if (manualForceRedact.getStatus().equals(AnnotationStatus.REQUESTED)) {
manualOverrideReason = mergeReasonIfNecessary(redactionLogEntry.getReason(), ", requested to force " + (dictionaryIsHint ? "hint" : "redact"));
redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis());
}
if (manualOverrideReason != null) {
redactionLogEntry.setReason(manualOverrideReason);
}
var manualChange = ManualChange.from(manualForceRedact).withManualRedactionType(dictionaryIsHint ? ManualRedactionType.FORCE_HINT : ManualRedactionType.FORCE_REDACT);
redactionLogEntry.getManualChanges().add(manualChange);
}
private void processManualLegalBasisChange(RedactionLogEntry redactionLogEntry, ManualLegalBasisChange manualLegalBasisChange) {
String manualOverrideReason = null;
if (manualLegalBasisChange.getStatus().equals(AnnotationStatus.APPROVED)) {
manualOverrideReason = mergeReasonIfNecessary(redactionLogEntry.getReason(), ", legal basis was manually changed");
redactionLogEntry.setLegalBasis(manualLegalBasisChange.getLegalBasis());
redactionLogEntry.setRedacted(true);
if (manualLegalBasisChange.getSection() != null) {
redactionLogEntry.setSection(manualLegalBasisChange.getSection());
}
if (redactionLogEntry.isRectangle() && manualLegalBasisChange.getValue() != null) {
redactionLogEntry.setValue(manualLegalBasisChange.getValue());
}
} else if (manualLegalBasisChange.getStatus().equals(AnnotationStatus.REQUESTED)) {
manualOverrideReason = mergeReasonIfNecessary(redactionLogEntry.getReason(), ", legal basis change requested");
}
if (manualOverrideReason != null) {
redactionLogEntry.setReason(manualOverrideReason);
}
var manualChange = ManualChange.from(manualLegalBasisChange).withManualRedactionType(ManualRedactionType.LEGAL_BASIS_CHANGE);
manualChange.withChange("legalBasis", manualLegalBasisChange.getLegalBasis());
if (manualLegalBasisChange.getSection() != null) {
manualChange.withChange("section", manualLegalBasisChange.getSection());
}
if (redactionLogEntry.isRectangle() && manualLegalBasisChange.getValue() != null) {
manualChange.withChange("value", manualLegalBasisChange.getValue());
}
redactionLogEntry.getManualChanges().add(manualChange);
}
private void processManualResizeRedaction(RedactionLogEntry redactionLogEntry, ManualResizeRedaction manualResizeRedact) {
String manualOverrideReason = null;
if (manualResizeRedact.getStatus().equals(AnnotationStatus.APPROVED)) {
redactionLogEntry.setPositions(convertPositions(manualResizeRedact.getPositions()));
if (!"signature".equalsIgnoreCase(redactionLogEntry.getType()) && !"logo".equalsIgnoreCase(redactionLogEntry.getType())) {
redactionLogEntry.setValue(manualResizeRedact.getValue());
}
// This is for backwards compatibility, now the text after/before is calculated during reanalysis because we need to find dict entries on positions where entries are resized to smaller.
if (manualResizeRedact.getTextBefore() != null || manualResizeRedact.getTextAfter() != null) {
redactionLogEntry.setTextBefore(manualResizeRedact.getTextBefore());
redactionLogEntry.setTextAfter(manualResizeRedact.getTextAfter());
}
manualOverrideReason = mergeReasonIfNecessary(redactionLogEntry.getReason(), ", resized by manual override");
} else if (manualResizeRedact.getStatus().equals(AnnotationStatus.REQUESTED)) {
manualOverrideReason = mergeReasonIfNecessary(redactionLogEntry.getReason(), ", requested to resize redact");
redactionLogEntry.setPositions(convertPositions(manualResizeRedact.getPositions()));
// This is for backwards compatibility, now the text after/before is calculated during reanalysis because we need to find dict entries on positions where entries are resized to smaller.
if (manualResizeRedact.getTextBefore() != null || manualResizeRedact.getTextAfter() != null) {
redactionLogEntry.setTextBefore(manualResizeRedact.getTextBefore());
redactionLogEntry.setTextAfter(manualResizeRedact.getTextAfter());
}
}
redactionLogEntry.setReason(manualOverrideReason);
redactionLogEntry.getManualChanges()
.add(ManualChange.from(manualResizeRedact).withManualRedactionType(ManualRedactionType.RESIZE).withChange("value", manualResizeRedact.getValue()));
}
public List<RedactionLogEntry> addManualAddEntries(Set<ManualRedactionEntry> manualAdds, int analysisNumber) {
List<RedactionLogEntry> redactionLogEntries = new ArrayList<>();
for (ManualRedactionEntry manualRedactionEntry : manualAdds) {
if (shouldCreateManualEntry(manualRedactionEntry)) {
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(manualRedactionEntry, manualRedactionEntry.getAnnotationId(), analysisNumber);
redactionLogEntry.setPositions(convertPositions(manualRedactionEntry.getPositions()));
redactionLogEntry.setTextBefore(manualRedactionEntry.getTextBefore());
redactionLogEntry.setTextAfter(manualRedactionEntry.getTextAfter());
redactionLogEntries.add(redactionLogEntry);
}
}
return redactionLogEntries;
}
private List<Rectangle> convertPositions(List<com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle> positions) {
return positions.stream()
.map(pos -> new Rectangle(new Point(pos.getTopLeftX(), pos.getTopLeftY()), pos.getWidth(), pos.getHeight(), pos.getPage()))
.collect(Collectors.toList());
}
@SuppressWarnings("PMD.UselessParentheses")
private boolean shouldCreateManualEntry(ManualRedactionEntry manualRedactionEntry) {
if (!manualRedactionEntry.isApproved()) {
return false;
}
return (!manualRedactionEntry.isAddToDictionary() && !manualRedactionEntry.isAddToDossierDictionary()) || ((manualRedactionEntry.isAddToDictionary()
|| manualRedactionEntry.isAddToDossierDictionary())
&& manualRedactionEntry.getProcessedDate() == null);
}
private RedactionLogEntry createRedactionLogEntry(ManualRedactionEntry manualRedactionEntry, String id, int analysisNumber) {
var addToDictionary = manualRedactionEntry.isAddToDictionary() || manualRedactionEntry.isAddToDossierDictionary();
var change = ManualChange.from(manualRedactionEntry).withManualRedactionType(addToDictionary ? ManualRedactionType.ADD_TO_DICTIONARY : ManualRedactionType.ADD_LOCALLY);
List<ManualChange> changeList = new ArrayList<>();
changeList.add(change);
return RedactionLogEntry.builder()
.id(id)
.reason(manualRedactionEntry.getReason())
.isDictionaryEntry(manualRedactionEntry.isAddToDictionary())
.isDossierDictionaryEntry(manualRedactionEntry.isAddToDossierDictionary())
.legalBasis(manualRedactionEntry.getLegalBasis())
.value(manualRedactionEntry.getValue())
.sourceId(manualRedactionEntry.getSourceId())
.section(manualRedactionEntry.getSection())
.type(manualRedactionEntry.getType())
.redacted(true)
.isHint(false)
.sectionNumber(-1)
.rectangle(manualRedactionEntry.isRectangle())
.manualChanges(changeList)
.changes(List.of(new Change(analysisNumber + 1, ChangeType.ADDED, manualRedactionEntry.getRequestDate())))
.build();
}
@Data
@AllArgsConstructor
private static class ManualRedactionWrapper implements Comparable<ManualRedactionWrapper> {
private String id;
private OffsetDateTime date;
private Object item;
private boolean approved;
@Override
public int compareTo(ManualRedactionWrapper o) {
return this.date.compareTo(o.date);
}
}
}

Some files were not shown because too many files have changed in this diff Show More