RED-7384: improve performance significantly

This commit is contained in:
Kilian Schüttler 2024-04-23 12:51:07 +02:00
parent 93c1a2b90a
commit e6c048d6df
10 changed files with 641 additions and 154 deletions

View File

@ -12,7 +12,7 @@ plugins {
description = "redaction-service-server-v1"
val layoutParserVersion = "0.89.3"
val layoutParserVersion = "0.89.11"
val jacksonVersion = "2.15.2"
val droolsVersion = "9.44.0.Final"
val pdfBoxVersion = "3.0.0"

View File

@ -2,6 +2,7 @@ package com.iqser.red.service.redaction.v1.server.model.document;
import static java.lang.String.format;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
@ -40,7 +41,10 @@ public class DocumentTree {
public TextBlock buildTextBlock() {
return allEntriesInOrder().map(Entry::getNode).filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock).collect(new TextBlockCollector());
return allEntriesInOrder().map(Entry::getNode)
.filter(SemanticNode::isLeaf)
.map(SemanticNode::getLeafTextBlock)
.collect(new TextBlockCollector());
}
@ -114,13 +118,78 @@ public class DocumentTree {
public Stream<SemanticNode> childNodes(List<Integer> treeId) {
return getEntryById(treeId).children.stream().map(Entry::getNode);
return getEntryById(treeId).children.stream()
.map(Entry::getNode);
}
/**
* Finds all child nodes of the specified entry, whose nodes textRange intersects the given textRange. It achieves this by finding the first entry, whose textRange contains the start idx of the TextRange using a binary search.
* It then iterates over the remaining children adding them to the intersections, until one does not contain the end of the TextRange. All intersected Entries are returned as SemanticNodes.
*
* @param treeId the treeId of the Entry whose children shall be checked.
* @param textRange The TextRange to find intersecting childNodes for.
* @return A list of all SemanticNodes, that are direct children of the specified Entry, whose TextRange intersects the given TextRange
*/
public List<SemanticNode> findIntersectingChildNodes(List<Integer> treeId, TextRange textRange) {
List<Entry> childEntries = getEntryById(treeId).getChildren();
List<SemanticNode> intersectingChildEntries = new LinkedList<>();
int startIdx = findFirstIdxOfContainingChildBinarySearch(childEntries, textRange.start());
if (startIdx < 0) {
return intersectingChildEntries;
}
for (int i = startIdx; i < childEntries.size(); i++) {
if (childEntries.get(i).getNode().getTextRange().start() < textRange.end()) {
intersectingChildEntries.add(childEntries.get(i).getNode());
} else {
break;
}
}
return intersectingChildEntries;
}
public Optional<SemanticNode> findFirstContainingChild(List<Integer> treeId, TextRange textRange) {
List<Entry> childEntries = getEntryById(treeId).getChildren();
int startIdx = findFirstIdxOfContainingChildBinarySearch(childEntries, textRange.start());
if (startIdx < 0) {
return Optional.empty();
}
if (childEntries.get(startIdx).getNode().getTextRange().contains(textRange.end())) {
return Optional.of(childEntries.get(startIdx).getNode());
}
return Optional.empty();
}
private int findFirstIdxOfContainingChildBinarySearch(List<Entry> childNodes, int start) {
int low = 0;
int high = childNodes.size() - 1;
while (low <= high) {
int mid = low + (high - low) / 2;
TextRange range = childNodes.get(mid).getNode().getTextRange();
if (range.start() > start) {
high = mid - 1;
} else if (range.end() <= start) {
low = mid + 1;
} else {
return mid;
}
}
return -1;
}
public Stream<SemanticNode> childNodesOfType(List<Integer> treeId, NodeType nodeType) {
return getEntryById(treeId).children.stream().filter(entry -> entry.node.getType().equals(nodeType)).map(Entry::getNode);
return getEntryById(treeId).children.stream()
.filter(entry -> entry.node.getType().equals(nodeType))
.map(Entry::getNode);
}
@ -199,26 +268,32 @@ public class DocumentTree {
public Stream<Entry> allEntriesInOrder() {
return Stream.of(root).flatMap(DocumentTree::flatten);
return Stream.of(root)
.flatMap(DocumentTree::flatten);
}
public Stream<Entry> allSubEntriesInOrder(List<Integer> parentId) {
return getEntryById(parentId).children.stream().flatMap(DocumentTree::flatten);
return getEntryById(parentId).children.stream()
.flatMap(DocumentTree::flatten);
}
@Override
public String toString() {
return String.join("\n", allEntriesInOrder().map(Entry::toString).toList());
return String.join("\n",
allEntriesInOrder().map(Entry::toString)
.toList());
}
private static Stream<Entry> flatten(Entry entry) {
return Stream.concat(Stream.of(entry), entry.children.stream().flatMap(DocumentTree::flatten));
return Stream.concat(Stream.of(entry),
entry.children.stream()
.flatMap(DocumentTree::flatten));
}
@ -240,7 +315,7 @@ public class DocumentTree {
List<Integer> treeId;
SemanticNode node;
@Builder.Default
List<Entry> children = new LinkedList<>();
List<Entry> children = new ArrayList<>();
@Override

View File

@ -92,12 +92,18 @@ public class TextRange implements Comparable<TextRange> {
public List<TextRange> split(List<Integer> splitIndices) {
if (splitIndices.stream().anyMatch(idx -> !this.contains(idx))) {
throw new IndexOutOfBoundsException(format("%s splitting indices are out of range for %s", splitIndices.stream().filter(idx -> !this.contains(idx)).toList(), this));
if (splitIndices.stream()
.anyMatch(idx -> !this.contains(idx))) {
throw new IndexOutOfBoundsException(format("%s splitting indices are out of range for %s",
splitIndices.stream()
.filter(idx -> !this.contains(idx))
.toList(),
this));
}
List<TextRange> splitBoundaries = new LinkedList<>();
int previousIndex = start;
for (int splitIndex : splitIndices) {
for (int i = 0, splitIndicesSize = splitIndices.size(); i < splitIndicesSize; i++) {
int splitIndex = splitIndices.get(i);
// skip split if it would produce a boundary of length 0
if (splitIndex == previousIndex) {
@ -113,8 +119,12 @@ public class TextRange implements Comparable<TextRange> {
public static TextRange merge(Collection<TextRange> boundaries) {
int minStart = boundaries.stream().mapToInt(TextRange::start).min().orElseThrow(IllegalArgumentException::new);
int maxEnd = boundaries.stream().mapToInt(TextRange::end).max().orElseThrow(IllegalArgumentException::new);
int minStart = boundaries.stream()
.mapToInt(TextRange::start)
.min().orElseThrow(IllegalArgumentException::new);
int maxEnd = boundaries.stream()
.mapToInt(TextRange::end)
.max().orElseThrow(IllegalArgumentException::new);
return new TextRange(minStart, maxEnd);
}

View File

@ -17,7 +17,6 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualCha
import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
@ -39,6 +38,8 @@ public class Image implements GenericSemanticNode, IEntity {
List<Integer> treeId;
String id;
TextBlock leafTextBlock;
ImageType imageType;
boolean transparent;
Rectangle2D position;
@ -49,14 +50,11 @@ public class Image implements GenericSemanticNode, IEntity {
@Builder.Default
ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite();
@EqualsAndHashCode.Exclude
Page page;
@EqualsAndHashCode.Exclude
DocumentTree documentTree;
@Builder.Default
@EqualsAndHashCode.Exclude
Set<TextEntity> entities = new HashSet<>();
@ -70,9 +68,7 @@ public class Image implements GenericSemanticNode, IEntity {
@Override
public TextBlock getTextBlock() {
return streamAllSubNodes().filter(SemanticNode::isLeaf)
.map(SemanticNode::getLeafTextBlock)
.collect(new TextBlockCollector());
return leafTextBlock;
}
@ -86,15 +82,21 @@ public class Image implements GenericSemanticNode, IEntity {
@Override
public TextRange getTextRange() {
return GenericSemanticNode.super.getTextRange();
return leafTextBlock.getTextRange();
}
@Override
public int length() {
return getTextRange().length();
}
@Override
public String type() {
return getManualOverwrite().getType()
.orElse(imageType.toString().toLowerCase(Locale.ENGLISH));
return getManualOverwrite().getType().orElse(imageType.toString().toLowerCase(Locale.ENGLISH));
}
@ -126,10 +128,4 @@ public class Image implements GenericSemanticNode, IEntity {
return name.charAt(0) + name.substring(1).toLowerCase(Locale.ENGLISH);
}
public int length() {
return 0;
}
}

View File

@ -70,7 +70,9 @@ public interface SemanticNode {
*/
default Page getFirstPage() {
return getTextBlock().getPages().stream().min(Comparator.comparingInt(Page::getNumber)).orElseThrow();
return getTextBlock().getPages()
.stream()
.min(Comparator.comparingInt(Page::getNumber)).orElseThrow();
}
@ -96,7 +98,8 @@ public interface SemanticNode {
*/
default boolean onPage(int pageNumber) {
return getPages().stream().anyMatch(page -> page.getNumber() == pageNumber);
return getPages().stream()
.anyMatch(page -> page.getNumber() == pageNumber);
}
@ -248,7 +251,9 @@ public interface SemanticNode {
*/
default boolean hasEntitiesOfType(String type) {
return getEntities().stream().filter(TextEntity::active).anyMatch(redactionEntity -> redactionEntity.type().equals(type));
return getEntities().stream()
.filter(TextEntity::active)
.anyMatch(redactionEntity -> redactionEntity.type().equals(type));
}
@ -261,7 +266,10 @@ public interface SemanticNode {
*/
default boolean hasEntitiesOfAnyType(String... types) {
return getEntities().stream().filter(TextEntity::active).anyMatch(redactionEntity -> Arrays.stream(types).anyMatch(type -> redactionEntity.type().equals(type)));
return getEntities().stream()
.filter(TextEntity::active)
.anyMatch(redactionEntity -> Arrays.stream(types)
.anyMatch(type -> redactionEntity.type().equals(type)));
}
@ -274,7 +282,12 @@ public interface SemanticNode {
*/
default boolean hasEntitiesOfAllTypes(String... types) {
return getEntities().stream().filter(TextEntity::active).map(TextEntity::type).collect(Collectors.toUnmodifiableSet()).containsAll(Arrays.stream(types).toList());
return getEntities().stream()
.filter(TextEntity::active)
.map(TextEntity::type)
.collect(Collectors.toUnmodifiableSet())
.containsAll(Arrays.stream(types)
.toList());
}
@ -287,7 +300,10 @@ public interface SemanticNode {
*/
default List<TextEntity> getEntitiesOfType(String type) {
return getEntities().stream().filter(TextEntity::active).filter(redactionEntity -> redactionEntity.type().equals(type)).toList();
return getEntities().stream()
.filter(TextEntity::active)
.filter(redactionEntity -> redactionEntity.type().equals(type))
.toList();
}
@ -300,7 +316,10 @@ public interface SemanticNode {
*/
default List<TextEntity> getEntitiesOfType(List<String> types) {
return getEntities().stream().filter(TextEntity::active).filter(redactionEntity -> redactionEntity.isAnyType(types)).toList();
return getEntities().stream()
.filter(TextEntity::active)
.filter(redactionEntity -> redactionEntity.isAnyType(types))
.toList();
}
@ -313,7 +332,11 @@ public interface SemanticNode {
*/
default List<TextEntity> getEntitiesOfType(String... types) {
return getEntities().stream().filter(TextEntity::active).filter(redactionEntity -> redactionEntity.isAnyType(Arrays.stream(types).toList())).toList();
return getEntities().stream()
.filter(TextEntity::active)
.filter(redactionEntity -> redactionEntity.isAnyType(Arrays.stream(types)
.toList()))
.toList();
}
@ -365,7 +388,8 @@ public interface SemanticNode {
*/
default boolean containsAllStrings(String... strings) {
return Arrays.stream(strings).allMatch(this::containsString);
return Arrays.stream(strings)
.allMatch(this::containsString);
}
@ -377,7 +401,8 @@ public interface SemanticNode {
*/
default boolean containsAnyString(String... strings) {
return Arrays.stream(strings).anyMatch(this::containsString);
return Arrays.stream(strings)
.anyMatch(this::containsString);
}
@ -389,7 +414,8 @@ public interface SemanticNode {
*/
default boolean containsAnyString(List<String> strings) {
return strings.stream().anyMatch(this::containsString);
return strings.stream()
.anyMatch(this::containsString);
}
@ -413,7 +439,8 @@ public interface SemanticNode {
*/
default boolean containsAnyStringIgnoreCase(String... strings) {
return Arrays.stream(strings).anyMatch(this::containsStringIgnoreCase);
return Arrays.stream(strings)
.anyMatch(this::containsStringIgnoreCase);
}
@ -425,7 +452,8 @@ public interface SemanticNode {
*/
default boolean containsAllStringsIgnoreCase(String... strings) {
return Arrays.stream(strings).allMatch(this::containsStringIgnoreCase);
return Arrays.stream(strings)
.allMatch(this::containsStringIgnoreCase);
}
@ -437,7 +465,9 @@ public interface SemanticNode {
*/
default boolean containsWord(String word) {
return getTextBlock().getWords().stream().anyMatch(s -> s.equals(word));
return getTextBlock().getWords()
.stream()
.anyMatch(s -> s.equals(word));
}
@ -449,7 +479,10 @@ public interface SemanticNode {
*/
default boolean containsWordIgnoreCase(String word) {
return getTextBlock().getWords().stream().map(String::toLowerCase).anyMatch(s -> s.equals(word.toLowerCase(Locale.ENGLISH)));
return getTextBlock().getWords()
.stream()
.map(String::toLowerCase)
.anyMatch(s -> s.equals(word.toLowerCase(Locale.ENGLISH)));
}
@ -461,7 +494,10 @@ public interface SemanticNode {
*/
default boolean containsAnyWord(String... words) {
return Arrays.stream(words).anyMatch(word -> getTextBlock().getWords().stream().anyMatch(word::equals));
return Arrays.stream(words)
.anyMatch(word -> getTextBlock().getWords()
.stream()
.anyMatch(word::equals));
}
@ -473,7 +509,12 @@ public interface SemanticNode {
*/
default boolean containsAnyWordIgnoreCase(String... words) {
return Arrays.stream(words).map(String::toLowerCase).anyMatch(word -> getTextBlock().getWords().stream().map(String::toLowerCase).anyMatch(word::equals));
return Arrays.stream(words)
.map(String::toLowerCase)
.anyMatch(word -> getTextBlock().getWords()
.stream()
.map(String::toLowerCase)
.anyMatch(word::equals));
}
@ -485,7 +526,10 @@ public interface SemanticNode {
*/
default boolean containsAllWords(String... words) {
return Arrays.stream(words).allMatch(word -> getTextBlock().getWords().stream().anyMatch(word::equals));
return Arrays.stream(words)
.allMatch(word -> getTextBlock().getWords()
.stream()
.anyMatch(word::equals));
}
@ -497,7 +541,12 @@ public interface SemanticNode {
*/
default boolean containsAllWordsIgnoreCase(String... words) {
return Arrays.stream(words).map(String::toLowerCase).allMatch(word -> getTextBlock().getWords().stream().map(String::toLowerCase).anyMatch(word::equals));
return Arrays.stream(words)
.map(String::toLowerCase)
.allMatch(word -> getTextBlock().getWords()
.stream()
.map(String::toLowerCase)
.anyMatch(word::equals));
}
@ -537,7 +586,11 @@ public interface SemanticNode {
*/
default boolean intersectsRectangle(int x, int y, int w, int h, int pageNumber) {
return getBBox().entrySet().stream().filter(entry -> entry.getKey().getNumber() == pageNumber).map(Map.Entry::getValue).anyMatch(rect -> rect.intersects(x, y, w, h));
return getBBox().entrySet()
.stream()
.filter(entry -> entry.getKey().getNumber() == pageNumber)
.map(Map.Entry::getValue)
.anyMatch(rect -> rect.intersects(x, y, w, h));
}
@ -556,7 +609,7 @@ public interface SemanticNode {
}
textEntity.addIntersectingNode(this);
streamChildren().filter(semanticNode -> semanticNode.getTextRange().intersects(textEntity.getTextRange()))
getDocumentTree().findIntersectingChildNodes(getTreeId(), textEntity.getTextRange())
.forEach(node -> node.addThisToEntityIfIntersects(textEntity));
}
}
@ -591,7 +644,8 @@ public interface SemanticNode {
*/
default Stream<SemanticNode> streamAllSubNodes() {
return getDocumentTree().allSubEntriesInOrder(getTreeId()).map(DocumentTree.Entry::getNode);
return getDocumentTree().allSubEntriesInOrder(getTreeId())
.map(DocumentTree.Entry::getNode);
}
@ -602,7 +656,9 @@ public interface SemanticNode {
*/
default Stream<SemanticNode> streamAllSubNodesOfType(NodeType nodeType) {
return getDocumentTree().allSubEntriesInOrder(getTreeId()).filter(entry -> entry.getType().equals(nodeType)).map(DocumentTree.Entry::getNode);
return getDocumentTree().allSubEntriesInOrder(getTreeId())
.filter(entry -> entry.getType().equals(nodeType))
.map(DocumentTree.Entry::getNode);
}
@ -641,7 +697,8 @@ public interface SemanticNode {
if (isLeaf()) {
return getTextBlock().getPositionsPerPage(textRange);
}
Optional<SemanticNode> containingChildNode = streamChildren().filter(child -> child.getTextRange().contains(textRange)).findFirst();
Optional<SemanticNode> containingChildNode = getDocumentTree().findFirstContainingChild(getTreeId(), textRange);
if (containingChildNode.isEmpty()) {
return getTextBlock().getPositionsPerPage(textRange);
}
@ -691,8 +748,12 @@ public interface SemanticNode {
private Map<Page, Rectangle2D> getBBoxFromChildren() {
Map<Page, Rectangle2D> bBoxPerPage = new HashMap<>();
List<Map<Page, Rectangle2D>> childrenBBoxes = streamChildren().map(SemanticNode::getBBox).toList();
Set<Page> pages = childrenBBoxes.stream().flatMap(map -> map.keySet().stream()).collect(Collectors.toSet());
List<Map<Page, Rectangle2D>> childrenBBoxes = streamChildren().map(SemanticNode::getBBox)
.toList();
Set<Page> pages = childrenBBoxes.stream()
.flatMap(map -> map.keySet()
.stream())
.collect(Collectors.toSet());
for (Page page : pages) {
Rectangle2D bBoxOnPage = childrenBBoxes.stream()
.filter(childBboxPerPage -> childBboxPerPage.containsKey(page))
@ -710,7 +771,9 @@ public interface SemanticNode {
private Map<Page, Rectangle2D> getBBoxFromLeafTextBlock() {
Map<Page, Rectangle2D> bBoxPerPage = new HashMap<>();
Map<Page, List<AtomicTextBlock>> atomicTextBlockPerPage = getTextBlock().getAtomicTextBlocks().stream().collect(Collectors.groupingBy(AtomicTextBlock::getPage));
Map<Page, List<AtomicTextBlock>> atomicTextBlockPerPage = getTextBlock().getAtomicTextBlocks()
.stream()
.collect(Collectors.groupingBy(AtomicTextBlock::getPage));
atomicTextBlockPerPage.forEach((page, atomicTextBlocks) -> bBoxPerPage.put(page, RectangleTransformations.atomicTextBlockBBox(atomicTextBlocks)));
return bBoxPerPage;
}

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.server.service.document;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedList;
@ -7,22 +8,22 @@ import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentData;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Header;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentData;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPage;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
@ -39,7 +40,9 @@ public class DocumentGraphMapper {
DocumentTree documentTree = new DocumentTree(document);
Context context = new Context(documentData, documentTree);
context.pageData.addAll(Arrays.stream(documentData.getDocumentPages()).map(DocumentGraphMapper::buildPage).toList());
context.pageData.addAll(Arrays.stream(documentData.getDocumentPages())
.map(DocumentGraphMapper::buildPage)
.toList());
context.documentTree.getRoot().getChildren().addAll(buildEntries(documentData.getDocumentStructure().getRoot().getChildren(), context));
@ -54,10 +57,12 @@ public class DocumentGraphMapper {
private List<DocumentTree.Entry> buildEntries(List<DocumentStructure.EntryData> entries, Context context) {
List<DocumentTree.Entry> newEntries = new LinkedList<>();
List<DocumentTree.Entry> newEntries = new ArrayList<>(entries.size());
for (DocumentStructure.EntryData entryData : entries) {
List<Page> pages = Arrays.stream(entryData.getPageNumbers()).map(pageNumber -> getPage(pageNumber, context)).toList();
List<Page> pages = Arrays.stream(entryData.getPageNumbers())
.map(pageNumber -> getPage(pageNumber, context))
.toList();
SemanticNode node = switch (entryData.getType()) {
case SECTION -> buildSection(context);
@ -75,7 +80,8 @@ public class DocumentGraphMapper {
TextBlock textBlock = toTextBlock(entryData.getAtomicBlockIds(), context, node);
node.setLeafTextBlock(textBlock);
}
List<Integer> treeId = Arrays.stream(entryData.getTreeId()).boxed().toList();
List<Integer> treeId = Arrays.stream(entryData.getTreeId()).boxed()
.toList();
node.setTreeId(treeId);
switch (entryData.getType()) {
@ -148,16 +154,18 @@ public class DocumentGraphMapper {
private TextBlock toTextBlock(Long[] atomicTextBlockIds, Context context, SemanticNode parent) {
return Arrays.stream(atomicTextBlockIds).map(atomicTextBlockId -> getAtomicTextBlock(context, parent, atomicTextBlockId)).collect(new TextBlockCollector());
return Arrays.stream(atomicTextBlockIds)
.map(atomicTextBlockId -> getAtomicTextBlock(context, parent, atomicTextBlockId))
.collect(new TextBlockCollector());
}
private AtomicTextBlock getAtomicTextBlock(Context context, SemanticNode parent, Long atomicTextBlockId) {
return AtomicTextBlock.fromAtomicTextBlockData(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)),
context.documentPositionData.get(Math.toIntExact(atomicTextBlockId)),
parent,
getPage(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)).getPage(), context));
context.documentPositionData.get(Math.toIntExact(atomicTextBlockId)),
parent,
getPage(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)).getPage(), context));
}
@ -171,8 +179,7 @@ public class DocumentGraphMapper {
return context.pageData.stream()
.filter(page -> page.getNumber() == Math.toIntExact(pageIndex))
.findFirst()
.orElseThrow(() -> new NoSuchElementException(String.format("ClassificationPage with number %d not found", pageIndex)));
.findFirst().orElseThrow(() -> new NoSuchElementException(String.format("ClassificationPage with number %d not found", pageIndex)));
}
@ -188,8 +195,10 @@ public class DocumentGraphMapper {
this.documentTree = documentTree;
this.pageData = new LinkedList<>();
this.documentTextData = Arrays.stream(documentData.getDocumentTextData()).toList();
this.documentPositionData = Arrays.stream(documentData.getDocumentPositionData()).toList();
this.documentTextData = Arrays.stream(documentData.getDocumentTextData())
.toList();
this.documentPositionData = Arrays.stream(documentData.getDocumentPositionData())
.toList();
}

View File

@ -1,14 +1,18 @@
package com.iqser.red.service.redaction.v1.server.service.document;
import static com.iqser.red.service.redaction.v1.server.service.document.EntityCreationUtility.*;
import static com.iqser.red.service.redaction.v1.server.service.document.EntityCreationUtility.addEntityToNodeEntitySets;
import static com.iqser.red.service.redaction.v1.server.service.document.EntityCreationUtility.addToPages;
import static com.iqser.red.service.redaction.v1.server.service.document.EntityCreationUtility.allEntitiesIntersectAndHaveSameTypes;
import static com.iqser.red.service.redaction.v1.server.service.document.EntityCreationUtility.checkIfBothStartAndEndAreEmpty;
import static com.iqser.red.service.redaction.v1.server.service.document.EntityCreationUtility.findIntersectingSubNodes;
import static com.iqser.red.service.redaction.v1.server.service.document.EntityCreationUtility.toLineAfterTextRange;
import static com.iqser.red.service.redaction.v1.server.service.document.EntityCreationUtility.truncateEndIfLineBreakIsBetween;
import static com.iqser.red.service.redaction.v1.server.utils.SeparatorUtils.boundaryIsSurroundedBySeparators;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
@ -276,7 +280,8 @@ public class EntityCreationService {
"this is some text. a here is more text" and "here is more text". We only want to keep the latter.
*/
return entityTextRanges.stream()
.filter(boundary -> entityTextRanges.stream().noneMatch(innerBoundary -> !innerBoundary.equals(boundary) && innerBoundary.containedBy(boundary)))
.filter(boundary -> entityTextRanges.stream()
.noneMatch(innerBoundary -> !innerBoundary.equals(boundary) && innerBoundary.containedBy(boundary)))
.toList();
}
@ -351,10 +356,10 @@ public class EntityCreationService {
return tableNode.streamTableCells()
.flatMap(tableCell -> lineAfterBoundariesAcrossColumns(RedactionSearchUtility.findTextRangesByString(string, tableCell.getTextBlock()),
tableCell,
type,
entityType,
tableNode));
tableCell,
type,
entityType,
tableNode));
}
@ -362,10 +367,10 @@ public class EntityCreationService {
return tableNode.streamTableCells()
.flatMap(tableCell -> lineAfterBoundariesAcrossColumns(RedactionSearchUtility.findTextRangesByStringIgnoreCase(string, tableCell.getTextBlock()),
tableCell,
type,
entityType,
tableNode));
tableCell,
type,
entityType,
tableNode));
}
@ -500,7 +505,10 @@ public class EntityCreationService {
public Stream<TextEntity> bySemanticNodeParagraphsOnly(SemanticNode node, String type, EntityType entityType) {
return node.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(semanticNode -> bySemanticNode(semanticNode, type, entityType)).filter(Optional::isPresent).map(Optional::get);
return node.streamAllSubNodesOfType(NodeType.PARAGRAPH)
.map(semanticNode -> bySemanticNode(semanticNode, type, entityType))
.filter(Optional::isPresent)
.map(Optional::get);
}
@ -590,14 +598,18 @@ public class EntityCreationService {
throw new IllegalArgumentException(String.format("%s is not in the %s of the provided semantic node %s", textRange, node.getTextRange(), node));
}
TextRange trimmedTextRange = textRange.trim(node.getTextBlock());
if (trimmedTextRange.length() == 0){
if (trimmedTextRange.length() == 0) {
return Optional.empty();
}
TextEntity entity = TextEntity.initialEntityNode(trimmedTextRange, type, entityType, node);
if (node.getEntities().contains(entity)) {
Optional<TextEntity> optionalTextEntity = node.getEntities().stream().filter(e -> e.equals(entity) && e.type().equals(type)).peek(e -> e.addEngines(engines)).findAny();
Optional<TextEntity> optionalTextEntity = node.getEntities()
.stream()
.filter(e -> e.equals(entity) && e.type().equals(type))
.peek(e -> e.addEngines(engines))
.findAny();
if (optionalTextEntity.isEmpty()) {
return optionalTextEntity; // Entity has been recategorized and should not be created at all.
return Optional.empty(); // Entity has been recategorized and should not be created at all.
}
TextEntity existingEntity = optionalTextEntity.get();
if (existingEntity.getTextRange().equals(textRange)) {
@ -609,7 +621,7 @@ public class EntityCreationService {
}
return Optional.empty(); // Entity has been resized, if there are duplicates they should be treated there
}
addEntityToGraph(entity, node);
addEntityToGraph(entity, node.getDocumentTree());
entity.addEngines(engines);
insertToKieSession(entity);
return Optional.of(entity);
@ -638,6 +650,8 @@ public class EntityCreationService {
}
// Do not use anymore. This might not work correctly due to duplicate textranges not being taken into account here.
@Deprecated(forRemoval = true)
public TextEntity mergeEntitiesOfSameType(List<TextEntity> entitiesToMerge, String type, EntityType entityType, SemanticNode node) {
if (!allEntitiesIntersectAndHaveSameTypes(entitiesToMerge)) {
@ -650,29 +664,44 @@ public class EntityCreationService {
return entitiesToMerge.get(0);
}
TextEntity mergedEntity = TextEntity.initialEntityNode(TextRange.merge(entitiesToMerge.stream().map(TextEntity::getTextRange).toList()), type, entityType, node);
mergedEntity.addEngines(entitiesToMerge.stream().flatMap(entityNode -> entityNode.getEngines().stream()).collect(Collectors.toSet()));
entitiesToMerge.stream().map(TextEntity::getMatchedRuleList).flatMap(Collection::stream).forEach(matchedRule -> mergedEntity.getMatchedRuleList().add(matchedRule));
TextEntity mergedEntity = TextEntity.initialEntityNode(TextRange.merge(entitiesToMerge.stream()
.map(TextEntity::getTextRange)
.toList()), type, entityType, node);
mergedEntity.addEngines(entitiesToMerge.stream()
.flatMap(entityNode -> entityNode.getEngines()
.stream())
.collect(Collectors.toSet()));
entitiesToMerge.stream()
.map(TextEntity::getMatchedRuleList)
.flatMap(Collection::stream)
.forEach(matchedRule -> mergedEntity.getMatchedRuleList().add(matchedRule));
entitiesToMerge.stream()
.map(TextEntity::getManualOverwrite)
.map(ManualChangeOverwrite::getManualChangeLog)
.flatMap(Collection::stream)
.forEach(manualChange -> mergedEntity.getManualOverwrite().addChange(manualChange));
mergedEntity.setDictionaryEntry(entitiesToMerge.stream().anyMatch(TextEntity::isDictionaryEntry));
mergedEntity.setDossierDictionaryEntry(entitiesToMerge.stream().anyMatch(TextEntity::isDossierDictionaryEntry));
mergedEntity.setDictionaryEntry(entitiesToMerge.stream()
.anyMatch(TextEntity::isDictionaryEntry));
mergedEntity.setDossierDictionaryEntry(entitiesToMerge.stream()
.anyMatch(TextEntity::isDossierDictionaryEntry));
entityEnrichmentService.enrichEntity(mergedEntity, node.getTextBlock());
addEntityToGraph(mergedEntity, node);
insertToKieSession(mergedEntity);
entitiesToMerge.stream().filter(e -> !e.equals(mergedEntity)).forEach(node.getEntities()::remove);
entitiesToMerge.stream()
.filter(e -> !e.equals(mergedEntity))
.forEach(node.getEntities()::remove);
return mergedEntity;
}
public Stream<TextEntity> copyEntities(List<TextEntity> entities, String type, EntityType entityType, SemanticNode node) {
return entities.stream().map(entity -> copyEntity(entity, type, entityType, node));
return entities.stream()
.map(entity -> copyEntity(entity, type, entityType, node));
}
@ -746,38 +775,19 @@ public class EntityCreationService {
public void addEntityToGraph(TextEntity entity, SemanticNode node) {
DocumentTree documentTree = node.getDocumentTree();
try {
if (node.getEntities().contains(entity)) {
// If entity already exists and it has a different text range, we add the text range to the list of duplicated text ranges
Optional<TextEntity> optionalTextEntity = node.getEntities()
.stream()//
.filter(e -> e.equals(entity))//
.filter(e -> !e.getTextRange().equals(entity.getTextRange()))//
.findAny();
if (optionalTextEntity.isPresent()) {
addDuplicateEntityToGraph(optionalTextEntity.get(), entity.getTextRange(), node);
} else {
node.getEntities().remove(entity);
addNewEntityToGraph(entity, documentTree);
}
if (node.getEntities().contains(entity)) {
// If entity already exists and it has a different text range, we add the text range to the list of duplicated text ranges
node.getEntities()
.stream()//
.filter(e -> e.equals(entity))//
.filter(e -> !e.getTextRange().equals(entity.getTextRange()))//
.findAny()
.ifPresent(e -> addDuplicateEntityToGraph(e, entity.getTextRange(), node));
} else {
entity.addIntersectingNode(documentTree.getRoot().getNode());
addEntityToGraph(entity, documentTree);
}
} catch (NoSuchElementException e) {
addNewEntityToGraph(entity, documentTree);
} else {
addEntityToGraph(entity, documentTree);
}
}
private void addNewEntityToGraph(TextEntity entity, DocumentTree documentTree) {
entity.setDeepestFullyContainingNode(documentTree.getRoot().getNode());
entityEnrichmentService.enrichEntity(entity, entity.getDeepestFullyContainingNode().getTextBlock());
entity.addIntersectingNode(documentTree.getRoot().getNode());
addToPages(entity);
addEntityToNodeEntitySets(entity);
}
@ -788,10 +798,11 @@ public class EntityCreationService {
SemanticNode deepestSharedNode = entityToDuplicate.getIntersectingNodes()
.stream()
.sorted(Comparator.comparingInt(n -> -n.getTreeId().size()))
.filter(intersectingNode -> entityToDuplicate.getDuplicateTextRanges().stream().allMatch(tr -> intersectingNode.getTextRange().contains(tr)) && //
intersectingNode.getTextRange().contains(entityToDuplicate.getTextRange()))
.findFirst()
.orElse(node.getDocumentTree().getRoot().getNode());
.filter(intersectingNode -> entityToDuplicate.getDuplicateTextRanges()
.stream()
.allMatch(tr -> intersectingNode.getTextRange().contains(tr)) && //
intersectingNode.getTextRange().contains(entityToDuplicate.getTextRange()))
.findFirst().orElse(node.getDocumentTree().getRoot().getNode());
entityToDuplicate.setDeepestFullyContainingNode(deepestSharedNode);
@ -802,7 +813,8 @@ public class EntityCreationService {
return;
}
additionalIntersectingNode.getEntities().add(entityToDuplicate);
additionalIntersectingNode.getPages(newTextRange).forEach(page -> page.getEntities().add(entityToDuplicate));
additionalIntersectingNode.getPages(newTextRange)
.forEach(page -> page.getEntities().add(entityToDuplicate));
entityToDuplicate.addIntersectingNode(additionalIntersectingNode);
});
}
@ -810,12 +822,7 @@ public class EntityCreationService {
private void addEntityToGraph(TextEntity entity, DocumentTree documentTree) {
SemanticNode containingNode = documentTree.childNodes(Collections.emptyList())
.filter(node -> node.getTextBlock().containsTextRange(entity.getTextRange()))
.findFirst()
.orElseThrow(() -> new NoSuchElementException("No containing Node found!"));
containingNode.addThisToEntityIfIntersects(entity);
documentTree.getRoot().getNode().addThisToEntityIfIntersects(entity);
TextBlock textBlock = entity.getDeepestFullyContainingNode().getTextBlock();
entityEnrichmentService.enrichEntity(entity, textBlock);
@ -824,5 +831,4 @@ public class EntityCreationService {
addEntityToNodeEntitySets(entity);
}
}

View File

@ -0,0 +1,327 @@
package com.iqser.red.service.redaction.v1.server;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.when;
import java.io.File;
import java.io.FileInputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import java.util.zip.GZIPInputStream;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.context.annotation.Import;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Sets;
import com.iqser.red.commons.jackson.ObjectMapperFactory;
import com.iqser.red.service.dictionarymerge.commons.DictionaryEntryModel;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryIncrement;
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryModel;
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryVersion;
import com.iqser.red.service.redaction.v1.server.service.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.service.DictionaryService;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.service.redaction.v1.server.utils.exception.NotFoundException;
import com.iqser.red.storage.commons.service.StorageService;
import com.knecon.fforesight.tenantcommons.TenantsClient;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import(RedactionIntegrationTest.RedactionIntegrationTestConfiguration.class)
@Disabled
/*
* This test is meant to be used directly with a download from blob storage (e.g. minio). You need to define the dossier template you want to use by supplying an absolute path.
* The dossier template will then be parsed for dictionaries, colors, entities, and rules. This is defined for the all tests once.
* Inside a test you supply a path to your minio download folder. The files should still be zipped in this folder.
* The files will then be checked for completeness and uploaded to the FileSystemBackedStorageService.
* This way you can recreate what is happening on the stack almost exactly.
*/ public class AnalysisEnd2EndTest {
Path dossierTemplateToUse = Path.of("/home/kschuettler/iqser/business-logic/redactmanager/prod-cp-eu-reg/EFSA_sanitisation_GFL_v1"); // Add your dossier-template here
ObjectMapper mapper = ObjectMapperFactory.create();
final String TENANT_ID = "tenant";
@Autowired
StorageService storageService;
@Autowired
protected AnalyzeService analyzeService;
@MockBean
DictionaryService dictionaryService;
@MockBean
RabbitTemplate rabbitTemplate;
TestDossierTemplate testDossierTemplate;
@MockBean
protected LegalBasisClient legalBasisClient;
@MockBean
private TenantsClient tenantsClient;
@MockBean
protected RulesClient rulesClient;
@MockBean
protected DictionaryClient dictionaryClient;
@Test
@SneakyThrows
public void runAnalysisEnd2End() {
String folder = "files/end2end/file0"; // Should contain all files from minio directly, still zipped. Can contain multiple files.
Path absoluteFolderPath;
if (folder.startsWith("files")) { // if it starts with "files" it is most likely in the resources folder, else it should be an absolute path
ClassPathResource classPathResource = new ClassPathResource(folder);
absoluteFolderPath = classPathResource.getFile().toPath();
} else {
absoluteFolderPath = Path.of(folder);
}
log.info("Starting end2end analyses for all distinct filenames in folder: {}", folder);
List<AnalyzeRequest> analyzeRequests = prepareStorageForFolder(absoluteFolderPath);
log.info("Found {} distinct fileIds", analyzeRequests.size());
for (int i = 0; i < analyzeRequests.size(); i++) {
AnalyzeRequest analyzeRequest = analyzeRequests.get(i);
log.info("{}/{}: Starting analysis for file {}", i + 1, analyzeRequests.size(), analyzeRequest.getFileId());
analyzeService.analyze(analyzeRequest);
}
}
@BeforeEach
public void setup() {
testDossierTemplate = new TestDossierTemplate(dossierTemplateToUse);
when(dictionaryService.updateDictionary(any(), any())).thenReturn(new DictionaryVersion(0, 0));
when(dictionaryService.getDeepCopyDictionary(any(), any())).thenReturn(testDossierTemplate.testDictionary);
when(dictionaryService.getDictionaryIncrements(any(), any(), any())).thenReturn(new DictionaryIncrement(Collections.emptySet(), new DictionaryVersion(0, 0)));
when(dictionaryService.isHint(any(String.class), any())).thenAnswer(invocation -> {
String type = invocation.getArgument(0);
return testDossierTemplate.testDictionary.getType(type).isHint();
});
when(dictionaryService.getColor(any(String.class), any())).thenAnswer(invocation -> {
String type = invocation.getArgument(0);
return testDossierTemplate.testDictionary.getType(type).getColor();
});
when(dictionaryService.getNotRedactedColor(any())).thenReturn(new float[]{0.2f, 0.2f, 0.2f});
when(rulesClient.getVersion(testDossierTemplate.id, RuleFileType.ENTITY)).thenReturn(System.currentTimeMillis());
when(rulesClient.getRules(testDossierTemplate.id, RuleFileType.ENTITY)).thenReturn(JSONPrimitive.of(testDossierTemplate.rules));
when(rulesClient.getVersion(testDossierTemplate.id, RuleFileType.COMPONENT)).thenReturn(testDossierTemplate.componentRules != null ? System.currentTimeMillis() : -1);
when(rulesClient.getRules(testDossierTemplate.id, RuleFileType.COMPONENT)).thenReturn(JSONPrimitive.of(testDossierTemplate.componentRules));
}
@SneakyThrows
private List<AnalyzeRequest> prepareStorageForFolder(Path folder) {
return Files.list(folder)
.map(this::parseFileId)
.distinct()
.map(fileId -> prepareStorageForFile(fileId, folder))
.toList();
}
private String parseFileId(Path path) {
return path.getFileName().toString().split("\\.")[0];
}
@SneakyThrows
private AnalyzeRequest prepareStorageForFile(String fileId, Path folder) {
AnalyzeRequest request = new AnalyzeRequest();
request.setDossierId(UUID.randomUUID().toString());
request.setFileId(UUID.randomUUID().toString());
request.setDossierTemplateId(testDossierTemplate.id);
request.setManualRedactions(new ManualRedactions());
request.setAnalysisNumber(-1);
Set<FileType> endingsToUpload = Set.of("ORIGIN",
"DOCUMENT_PAGES",
"DOCUMENT_POSITION",
"DOCUMENT_STRUCTURE",
"DOCUMENT_TEXT",
"IMAGE_INFO",
"NER_ENTITIES",
"TABLES",
"IMPORTED_REDACTIONS")
.stream()
.map(FileType::valueOf)
.collect(Collectors.toSet());
Set<FileType> uploadedFileTypes = Files.walk(folder)
.filter(path -> path.toFile().isFile())
.filter(path -> endingsToUpload.contains(parseFileTypeFromPath(path)))
.map(filePath -> uploadFile(filePath, request))
.collect(Collectors.toUnmodifiableSet());
Set<FileType> missingFileTypes = Sets.difference(endingsToUpload, uploadedFileTypes);
if (!missingFileTypes.isEmpty()) {
log.error("Folder {} is missing files of type {}",
folder.toFile(),
missingFileTypes.stream()
.map(Enum::toString)
.collect(Collectors.joining(", ")));
throw new NotFoundException("Not all required file types are present.");
}
return request;
}
private static FileType parseFileTypeFromPath(Path path) {
return FileType.valueOf(path.getFileName().toString().split("\\.")[1]);
}
@SneakyThrows
private FileType uploadFile(Path path, AnalyzeRequest request) {
FileType fileType = parseFileTypeFromPath(path);
try (var fis = new FileInputStream(path.toFile()); var in = new GZIPInputStream(fis);) {
storageService.storeObject(TENANT_ID, RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), fileType), in);
}
return fileType;
}
private class TestDossierTemplate {
String id;
Dictionary testDictionary;
AtomicInteger dictEntryIdCounter = new AtomicInteger(0);
String rules;
String componentRules;
@SneakyThrows
TestDossierTemplate(Path dossierTemplateToUse) {
Map<String, Object> dossierTemplate = mapper.readValue(dossierTemplateToUse.resolve("dossierTemplate.json").toFile(), HashMap.class);
this.id = (String) dossierTemplate.get("dossierTemplateId");
List<DictionaryModel> dictionaries = Files.walk(dossierTemplateToUse)
.filter(path -> path.getFileName().toString().equals("dossierType.json"))
.map(this::loadDictionaryModel)
.toList();
File ruleFile = dossierTemplateToUse.resolve("rules.drl").toFile();
rules = new String(Files.readAllBytes(ruleFile.toPath()));
File componentRuleFile = dossierTemplateToUse.resolve("componentRules.drl").toFile();
if (componentRuleFile.exists()) {
componentRules = new String(Files.readAllBytes(componentRuleFile.toPath()));
}
testDictionary = new Dictionary(dictionaries, new DictionaryVersion(0, 0));
}
@SneakyThrows
private DictionaryModel loadDictionaryModel(Path path) {
Map<String, Object> model = mapper.readValue(path.toFile(), HashMap.class);
Set<DictionaryEntryModel> entries = new HashSet<>();
Set<DictionaryEntryModel> falsePositives = new HashSet<>();
Set<DictionaryEntryModel> falseRecommendations = new HashSet<>();
String type = (String) model.get("type");
Integer rank = (Integer) model.get("rank");
float[] color = hexToFloatArr((String) model.get("hexColor"));
Boolean caseInsensitive = (Boolean) model.get("caseInsensitive");
Boolean hint = (Boolean) model.get("hint");
Boolean hasDictionary = (Boolean) model.get("hasDictionary");
boolean isDossierDictionary;
if (model.containsKey("dossierDictionaryOnly")) {
isDossierDictionary = true;
} else {
isDossierDictionary = ((String) model.get("id")).split(":").length == 3;
}
if (hasDictionary) {
try (var in = new FileInputStream(path.getParent().resolve("entries.txt").toFile())) {
entries.addAll(parseDictionaryEntryModelFromFile(new String(in.readAllBytes()), dictEntryIdCounter, (String) model.get("typeId")));
}
try (var in = new FileInputStream(path.getParent().resolve("falsePositives.txt").toFile())) {
falsePositives.addAll(parseDictionaryEntryModelFromFile(new String(in.readAllBytes()), dictEntryIdCounter, (String) model.get("typeId")));
}
try (var in = new FileInputStream(path.getParent().resolve("falseRecommendations.txt").toFile())) {
falseRecommendations.addAll(parseDictionaryEntryModelFromFile(new String(in.readAllBytes()), dictEntryIdCounter, (String) model.get("typeId")));
}
}
return new DictionaryModel(type, rank, color, caseInsensitive, hint, entries, falsePositives, falseRecommendations, isDossierDictionary);
}
private Set<DictionaryEntryModel> parseDictionaryEntryModelFromFile(String s, AtomicInteger dictEntryIdCounter, String typeId) {
String[] values = s.split("\n");
return Arrays.stream(values)
.map(value -> new DictionaryEntryModel(dictEntryIdCounter.getAndIncrement(), value, 0L, false, typeId))
.collect(Collectors.toUnmodifiableSet());
}
private float[] hexToFloatArr(String hexColor) {
// Remove # symbol if present
String cleanHexColor = hexColor.replace("#", "");
// Parse hex string into RGB components
int r = Integer.parseInt(cleanHexColor.substring(0, 2), 16);
int g = Integer.parseInt(cleanHexColor.substring(2, 4), 16);
int b = Integer.parseInt(cleanHexColor.substring(4, 6), 16);
// Normalize RGB values to floats between 0 and 1
float[] rgbFloat = new float[3];
rgbFloat[0] = r / 255.0f;
rgbFloat[1] = g / 255.0f;
rgbFloat[2] = b / 255.0f;
return rgbFloat;
}
}
}

View File

@ -1,16 +0,0 @@
<Configuration>
<Appenders>
<Console name="CONSOLE" target="SYSTEM_OUT">
<PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n"/>
</Console>
</Appenders>
<Loggers>
<Root level="warn">
<AppenderRef ref="CONSOLE"/>
</Root>
<Logger name="com.iqser" level="info"/>
</Loggers>
</Configuration>

View File

@ -0,0 +1,17 @@
<configuration>
<springProperty scope="configuration" name="logType" source="logging.type"/>
<springProperty scope="context" name="application.name" source="spring.application.name"/>
<springProperty scope="context" name="version" source="project.version"/>
<include resource="org/springframework/boot/logging/logback/defaults.xml"/>
<include resource="org/springframework/boot/logging/logback/console-appender.xml"/>
<appender name="JSON" class="ch.qos.logback.core.ConsoleAppender">
<encoder class="net.logstash.logback.encoder.LogstashEncoder"/>
</appender>
<root level="INFO">
<appender-ref ref="${logType}"/>
</root>
</configuration>