reformat #303

Merged
kilian.schuettler1 merged 1 commits from reformat into master 2024-03-01 15:49:37 +01:00
127 changed files with 2363 additions and 1225 deletions

View File

@ -15,4 +15,5 @@ public class AnalyzeResponse {
private String fileId;
private List<UnprocessedManualEntity> unprocessedManualEntities;
}

View File

@ -17,4 +17,5 @@ public class DroolsSyntaxDeprecatedWarnings {
Integer line;
Integer column;
String message;
}

View File

@ -17,4 +17,5 @@ public class DroolsSyntaxErrorMessage {
Integer line;
Integer column;
String message;
}

View File

@ -28,6 +28,7 @@ public class DroolsSyntaxValidation {
getDroolsSyntaxErrorMessages().add(DroolsSyntaxErrorMessage.builder().line(line).column(column).message(message).build());
}
public boolean isCompiled() {
return droolsSyntaxErrorMessages.isEmpty();

View File

@ -24,4 +24,5 @@ public class UnprocessedManualEntity {
private String section;
@Builder.Default
private List<Position> positions = new ArrayList<>();
}

View File

@ -35,11 +35,14 @@ public class Application {
SpringApplication.run(Application.class, args);
}
@Bean
public ObservedAspect observedAspect(ObservationRegistry observationRegistry) {
return new ObservedAspect(observationRegistry);
}
@Bean
public TimedAspect timedAspect(MeterRegistry registry) {

View File

@ -95,6 +95,7 @@ public class DeprecatedElementsFinder {
return this.deprecatedClasses;
}
private String getMethodSignature(Method method) {
String methodName = method.getName();

View File

@ -16,7 +16,7 @@ public class RedisCachingConfiguration {
public RedisCacheManagerBuilderCustomizer redisCacheManagerBuilderCustomizer() {
return (builder) -> builder.withCacheConfiguration("documentDataCache",
RedisCacheConfiguration.defaultCacheConfig().entryTtl(Duration.ofMinutes(30)).disableCachingNullValues());
RedisCacheConfiguration.defaultCacheConfig().entryTtl(Duration.ofMinutes(30)).disableCachingNullValues());
}

View File

@ -1,6 +1,5 @@
package com.iqser.red.service.redaction.v1.server.client.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;

View File

@ -13,7 +13,6 @@ import lombok.RequiredArgsConstructor;
@RequiredArgsConstructor
public class RuleBuilderController implements RuleBuilderResource {
@Override
public RuleBuilderModel getRuleBuilderModel() {

View File

@ -92,11 +92,16 @@ public class LegacyRedactionLogMergeService {
return redactionLog;
}
public long getNumberOfAffectedAnnotations(ManualRedactions manualRedactions) {
return createManualRedactionWrappers(manualRedactions).stream().map(ManualRedactionWrapper::getId).distinct().count();
return createManualRedactionWrappers(manualRedactions).stream()
.map(ManualRedactionWrapper::getId)
.distinct()
.count();
}
private List<ManualRedactionWrapper> createManualRedactionWrappers(ManualRedactions manualRedactions) {
List<ManualRedactionWrapper> manualRedactionWrappers = new ArrayList<>();

View File

@ -21,7 +21,9 @@ public class LegacyVersion0MigrationService {
public RedactionLog mergeDuplicateAnnotationIds(RedactionLog redactionLog) {
List<RedactionLogEntry> mergedEntries = new LinkedList<>();
Map<String, List<RedactionLogEntry>> entriesById = redactionLog.getRedactionLogEntry().stream().collect(Collectors.groupingBy(RedactionLogEntry::getId));
Map<String, List<RedactionLogEntry>> entriesById = redactionLog.getRedactionLogEntry()
.stream()
.collect(Collectors.groupingBy(RedactionLogEntry::getId));
for (List<RedactionLogEntry> entries : entriesById.values()) {
if (entries.isEmpty()) {
@ -33,7 +35,10 @@ public class LegacyVersion0MigrationService {
continue;
}
List<RedactionLogEntry> sortedEntries = entries.stream().sorted(Comparator.comparing(entry -> entry.getChanges().get(0).getDateTime())).toList();
List<RedactionLogEntry> sortedEntries = entries.stream()
.sorted(Comparator.comparing(entry -> entry.getChanges()
.get(0).getDateTime()))
.toList();
RedactionLogEntry initialEntry = sortedEntries.get(0);
for (RedactionLogEntry entry : sortedEntries.subList(1, sortedEntries.size())) {

View File

@ -14,4 +14,5 @@ public record KieWrapper(KieContainer container, long rulesVersion) {
return container != null && rulesVersion >= 0;
}
}

View File

@ -19,4 +19,5 @@ public class MigratedEntityLog {
MigratedIds migratedIds;
EntityLog entityLog;
}

View File

@ -27,13 +27,15 @@ public class NerEntities {
public boolean hasEntitiesOfType(String type) {
return nerEntityList.stream().anyMatch(nerEntity -> nerEntity.type.equals(type));
return nerEntityList.stream()
.anyMatch(nerEntity -> nerEntity.type.equals(type));
}
public Stream<NerEntity> streamEntitiesOfType(String type) {
return nerEntityList.stream().filter(nerEntity -> nerEntity.type().equals(type));
return nerEntityList.stream()
.filter(nerEntity -> nerEntity.type().equals(type));
}

View File

@ -88,7 +88,8 @@ public class Entity {
.textAfter(e.getTextAfter())
.startOffset(e.getStartOffset())
.endOffset(e.getEndOffset())
.length(Optional.ofNullable(e.getValue()).orElse("").length())
.length(Optional.ofNullable(e.getValue())
.orElse("").length())
.imageHasTransparency(e.isImageHasTransparency())
.isDictionaryEntry(e.isDictionaryEntry())
.isDossierDictionaryEntry(e.isDossierDictionaryEntry())

View File

@ -53,7 +53,8 @@ public class Dictionary {
public boolean hasLocalEntries() {
return dictionaryModels.stream().anyMatch(dm -> !dm.getLocalEntriesWithMatchedRules().isEmpty());
return dictionaryModels.stream()
.anyMatch(dm -> !dm.getLocalEntriesWithMatchedRules().isEmpty());
}
@ -116,12 +117,18 @@ public class Dictionary {
}
localAccessMap.get(type)
.getLocalEntriesWithMatchedRules()
.merge(cleanedValue.trim(), matchedRulesSet, (set1, set2) -> Stream.concat(set1.stream(), set2.stream()).collect(Collectors.toSet()));
.merge(cleanedValue.trim(),
matchedRulesSet,
(set1, set2) -> Stream.concat(set1.stream(), set2.stream())
.collect(Collectors.toSet()));
if (alsoAddLastname) {
String lastname = cleanedValue.split(" ")[0];
localAccessMap.get(type)
.getLocalEntriesWithMatchedRules()
.merge(lastname, matchedRulesSet, (set1, set2) -> Stream.concat(set1.stream(), set2.stream()).collect(Collectors.toSet()));
.merge(lastname,
matchedRulesSet,
(set1, set2) -> Stream.concat(set1.stream(), set2.stream())
.collect(Collectors.toSet()));
}
}
@ -153,7 +160,10 @@ public class Dictionary {
} else {
splitAuthorNames = Arrays.asList(textEntity.getValueWithLineBreaks().split("\n"));
}
return splitAuthorNames.stream().map(String::trim).filter(authorName -> Patterns.AUTHOR_NAME_PATTERN.matcher(authorName).matches()).toList();
return splitAuthorNames.stream()
.map(String::trim)
.filter(authorName -> Patterns.AUTHOR_NAME_PATTERN.matcher(authorName).matches())
.toList();
}
}

View File

@ -57,14 +57,18 @@ public class DictionaryModel implements Serializable {
this.falsePositives = falsePositives;
this.falseRecommendations = falseRecommendations;
this.entriesSearch = new SearchImplementation(this.entries.stream().filter(e -> !e.isDeleted()).map(DictionaryEntryModel::getValue).collect(Collectors.toList()),
caseInsensitive);
this.falsePositiveSearch = new SearchImplementation(this.falsePositives.stream().filter(e -> !e.isDeleted()).map(DictionaryEntryModel::getValue).collect(Collectors.toList()),
caseInsensitive);
this.entriesSearch = new SearchImplementation(this.entries.stream()
.filter(e -> !e.isDeleted())
.map(DictionaryEntryModel::getValue)
.collect(Collectors.toList()), caseInsensitive);
this.falsePositiveSearch = new SearchImplementation(this.falsePositives.stream()
.filter(e -> !e.isDeleted())
.map(DictionaryEntryModel::getValue)
.collect(Collectors.toList()), caseInsensitive);
this.falseRecommendationsSearch = new SearchImplementation(this.falseRecommendations.stream()
.filter(e -> !e.isDeleted())
.map(DictionaryEntry::getValue)
.collect(Collectors.toList()), caseInsensitive);
.filter(e -> !e.isDeleted())
.map(DictionaryEntry::getValue)
.collect(Collectors.toList()), caseInsensitive);
}
@ -81,8 +85,10 @@ public class DictionaryModel implements Serializable {
public SearchImplementation getEntriesSearch() {
if (entriesSearch == null) {
this.entriesSearch = new SearchImplementation(this.entries.stream().filter(e -> !e.isDeleted()).map(DictionaryEntry::getValue).collect(Collectors.toList()),
caseInsensitive);
this.entriesSearch = new SearchImplementation(this.entries.stream()
.filter(e -> !e.isDeleted())
.map(DictionaryEntry::getValue)
.collect(Collectors.toList()), caseInsensitive);
}
return entriesSearch;
}
@ -92,9 +98,9 @@ public class DictionaryModel implements Serializable {
if (falsePositiveSearch == null) {
this.falsePositiveSearch = new SearchImplementation(this.falsePositives.stream()
.filter(e -> !e.isDeleted())
.map(DictionaryEntry::getValue)
.collect(Collectors.toList()), caseInsensitive);
.filter(e -> !e.isDeleted())
.map(DictionaryEntry::getValue)
.collect(Collectors.toList()), caseInsensitive);
}
return falsePositiveSearch;
}
@ -104,14 +110,16 @@ public class DictionaryModel implements Serializable {
if (falseRecommendationsSearch == null) {
this.falseRecommendationsSearch = new SearchImplementation(this.falseRecommendations.stream()
.filter(e -> !e.isDeleted())
.map(DictionaryEntry::getValue)
.collect(Collectors.toList()), caseInsensitive);
.filter(e -> !e.isDeleted())
.map(DictionaryEntry::getValue)
.collect(Collectors.toList()), caseInsensitive);
}
return falseRecommendationsSearch;
}
public Set<MatchedRule> getMatchedRulesForLocalDictionaryEntry(String value) {
var cleanedValue = isCaseInsensitive() ? value.toLowerCase(Locale.US) : value;
return localEntriesWithMatchedRules.get(cleanedValue);

View File

@ -76,7 +76,9 @@ public class SearchImplementation {
if (ignoreCase) {
textToCheck = textToCheck.toLowerCase(Locale.ROOT);
}
return this.pattern.matcher(textToCheck).results().findAny().isPresent();
return this.pattern.matcher(textToCheck).results()
.findAny()
.isPresent();
} else {
return this.trie.containsMatch(textToCheck);
}
@ -89,9 +91,14 @@ public class SearchImplementation {
return new ArrayList<>();
}
if (this.pattern != null) {
return this.pattern.matcher(text).results().map(r -> new TextRange(r.start(), r.end())).collect(Collectors.toList());
return this.pattern.matcher(text).results()
.map(r -> new TextRange(r.start(), r.end()))
.collect(Collectors.toList());
} else {
return this.trie.parseText(text).stream().map(r -> new TextRange(r.getStart(), r.getEnd() + 1)).collect(Collectors.toList());
return this.trie.parseText(text)
.stream()
.map(r -> new TextRange(r.getStart(), r.getEnd() + 1))
.collect(Collectors.toList());
}
}
@ -103,9 +110,14 @@ public class SearchImplementation {
}
CharSequence subSequence = text.subSequence(region.start(), region.end());
if (this.pattern != null) {
return this.pattern.matcher(subSequence).results().map(r -> new TextRange(r.start() + region.start(), r.end() + region.start())).collect(Collectors.toList());
return this.pattern.matcher(subSequence).results()
.map(r -> new TextRange(r.start() + region.start(), r.end() + region.start()))
.collect(Collectors.toList());
} else {
return this.trie.parseText(subSequence).stream().map(r -> new TextRange(r.getStart() + region.start(), r.getEnd() + region.start() + 1)).collect(Collectors.toList());
return this.trie.parseText(subSequence)
.stream()
.map(r -> new TextRange(r.getStart() + region.start(), r.getEnd() + region.start() + 1))
.collect(Collectors.toList());
}
}
@ -120,9 +132,14 @@ public class SearchImplementation {
if (ignoreCase) {
textToCheck = textToCheck.toLowerCase(Locale.ROOT);
}
return this.pattern.matcher(textToCheck).results().map(r -> new MatchPosition(r.start(), r.end())).collect(Collectors.toList());
return this.pattern.matcher(textToCheck).results()
.map(r -> new MatchPosition(r.start(), r.end()))
.collect(Collectors.toList());
} else {
return this.trie.parseText(textToCheck).stream().map(r -> new MatchPosition(r.getStart(), r.getEnd() + 1)).collect(Collectors.toList());
return this.trie.parseText(textToCheck)
.stream()
.map(r -> new MatchPosition(r.getStart(), r.getEnd() + 1))
.collect(Collectors.toList());
}
}

View File

@ -40,7 +40,10 @@ public class DocumentTree {
public TextBlock buildTextBlock() {
return allEntriesInOrder().map(Entry::getNode).filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock).collect(new TextBlockCollector());
return allEntriesInOrder().map(Entry::getNode)
.filter(SemanticNode::isLeaf)
.map(SemanticNode::getLeafTextBlock)
.collect(new TextBlockCollector());
}
@ -114,13 +117,16 @@ public class DocumentTree {
public Stream<SemanticNode> childNodes(List<Integer> treeId) {
return getEntryById(treeId).children.stream().map(Entry::getNode);
return getEntryById(treeId).children.stream()
.map(Entry::getNode);
}
public Stream<SemanticNode> childNodesOfType(List<Integer> treeId, NodeType nodeType) {
return getEntryById(treeId).children.stream().filter(entry -> entry.node.getType().equals(nodeType)).map(Entry::getNode);
return getEntryById(treeId).children.stream()
.filter(entry -> entry.node.getType().equals(nodeType))
.map(Entry::getNode);
}
@ -199,26 +205,32 @@ public class DocumentTree {
public Stream<Entry> allEntriesInOrder() {
return Stream.of(root).flatMap(DocumentTree::flatten);
return Stream.of(root)
.flatMap(DocumentTree::flatten);
}
public Stream<Entry> allSubEntriesInOrder(List<Integer> parentId) {
return getEntryById(parentId).children.stream().flatMap(DocumentTree::flatten);
return getEntryById(parentId).children.stream()
.flatMap(DocumentTree::flatten);
}
@Override
public String toString() {
return String.join("\n", allEntriesInOrder().map(Entry::toString).toList());
return String.join("\n",
allEntriesInOrder().map(Entry::toString)
.toList());
}
private static Stream<Entry> flatten(Entry entry) {
return Stream.concat(Stream.of(entry), entry.children.stream().flatMap(DocumentTree::flatten));
return Stream.concat(Stream.of(entry),
entry.children.stream()
.flatMap(DocumentTree::flatten));
}

View File

@ -92,8 +92,13 @@ public class TextRange implements Comparable<TextRange> {
public List<TextRange> split(List<Integer> splitIndices) {
if (splitIndices.stream().anyMatch(idx -> !this.contains(idx))) {
throw new IndexOutOfBoundsException(format("%s splitting indices are out of range for %s", splitIndices.stream().filter(idx -> !this.contains(idx)).toList(), this));
if (splitIndices.stream()
.anyMatch(idx -> !this.contains(idx))) {
throw new IndexOutOfBoundsException(format("%s splitting indices are out of range for %s",
splitIndices.stream()
.filter(idx -> !this.contains(idx))
.toList(),
this));
}
List<TextRange> splitBoundaries = new LinkedList<>();
int previousIndex = start;
@ -113,8 +118,14 @@ public class TextRange implements Comparable<TextRange> {
public static TextRange merge(Collection<TextRange> boundaries) {
int minStart = boundaries.stream().mapToInt(TextRange::start).min().orElseThrow(IllegalArgumentException::new);
int maxEnd = boundaries.stream().mapToInt(TextRange::end).max().orElseThrow(IllegalArgumentException::new);
int minStart = boundaries.stream()
.mapToInt(TextRange::start)
.min()
.orElseThrow(IllegalArgumentException::new);
int maxEnd = boundaries.stream()
.mapToInt(TextRange::end)
.max()
.orElseThrow(IllegalArgumentException::new);
return new TextRange(minStart, maxEnd);
}

View File

@ -35,14 +35,16 @@ public interface IEntity {
default String value() {
return getManualOverwrite().getValue().orElse(getValue() == null ? "" : getValue());
return getManualOverwrite().getValue()
.orElse(getValue() == null ? "" : getValue());
}
// Don't use default accessor pattern (e.g. isApplied()), as it might lead to errors in drools due to property-specific optimization of the drools planner.
default boolean applied() {
return getManualOverwrite().getApplied().orElse(getMatchedRule().isApplied());
return getManualOverwrite().getApplied()
.orElse(getMatchedRule().isApplied());
}
@ -54,19 +56,22 @@ public interface IEntity {
default boolean ignored() {
return getManualOverwrite().getIgnored().orElse(getMatchedRule().isIgnored());
return getManualOverwrite().getIgnored()
.orElse(getMatchedRule().isIgnored());
}
default boolean removed() {
return getManualOverwrite().getRemoved().orElse(getMatchedRule().isRemoved());
return getManualOverwrite().getRemoved()
.orElse(getMatchedRule().isRemoved());
}
default boolean resized() {
return getManualOverwrite().getResized().orElse(false);
return getManualOverwrite().getResized()
.orElse(false);
}
@ -133,12 +138,12 @@ public interface IEntity {
throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity");
}
getMatchedRuleList().add(MatchedRule.builder()
.ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier))
.reason(reason)
.legalBasis(legalBasis)
.applied(true)
.writeValueWithLineBreaks(true)
.build());
.ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier))
.reason(reason)
.legalBasis(legalBasis)
.applied(true)
.writeValueWithLineBreaks(true)
.build());
}
@ -148,12 +153,12 @@ public interface IEntity {
throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity");
}
getMatchedRuleList().add(MatchedRule.builder()
.ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier))
.reason(reason)
.legalBasis(legalBasis)
.applied(true)
.references(new HashSet<>(references))
.build());
.ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier))
.reason(reason)
.legalBasis(legalBasis)
.applied(true)
.references(new HashSet<>(references))
.build());
}
@ -207,7 +212,8 @@ public interface IEntity {
default String legalBasis() {
return getManualOverwrite().getLegalBasis().orElse(getMatchedRule().getLegalBasis());
return getManualOverwrite().getLegalBasis()
.orElse(getMatchedRule().getLegalBasis());
}
}

View File

@ -52,7 +52,8 @@ public final class MatchedRule implements Comparable<MatchedRule> {
if (!this.isApplied()) {
return this;
}
return MatchedRule.builder().ruleIdentifier(getRuleIdentifier())
return MatchedRule.builder()
.ruleIdentifier(getRuleIdentifier())
.writeValueWithLineBreaks(this.isWriteValueWithLineBreaks())
.legalBasis(this.getLegalBasis())
.reason(this.getReason())
@ -97,7 +98,19 @@ public final class MatchedRule implements Comparable<MatchedRule> {
@Override
public String toString() {
return "MatchedRule[ruleIdentifier=" + ruleIdentifier + ", reason=" + reason + ", legalBasis=" + legalBasis + ", applied=" + applied + ", writeValueWithLineBreaks=" + writeValueWithLineBreaks + ", references=" + references + ']';
return "MatchedRule[ruleIdentifier="
+ ruleIdentifier
+ ", reason="
+ reason
+ ", legalBasis="
+ legalBasis
+ ", applied="
+ applied
+ ", writeValueWithLineBreaks="
+ writeValueWithLineBreaks
+ ", references="
+ references
+ ']';
}
}

View File

@ -67,7 +67,13 @@ public class TextEntity implements IEntity {
public static TextEntity initialEntityNode(TextRange textRange, String type, EntityType entityType, SemanticNode node) {
return TextEntity.builder().id(buildId(node, textRange, type, entityType)).type(type).entityType(entityType).textRange(textRange).manualOverwrite(new ManualChangeOverwrite(entityType)).build();
return TextEntity.builder()
.id(buildId(node, textRange, type, entityType))
.type(type)
.entityType(entityType)
.textRange(textRange)
.manualOverwrite(new ManualChangeOverwrite(entityType))
.build();
}
@ -80,7 +86,13 @@ public class TextEntity implements IEntity {
private static String buildId(SemanticNode node, TextRange textRange, String type, EntityType entityType) {
Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = node.getPositionsPerPage(textRange);
return IdBuilder.buildId(rectanglesPerLinePerPage.keySet(), rectanglesPerLinePerPage.values().stream().flatMap(Collection::stream).toList(), type, entityType.name());
return IdBuilder.buildId(rectanglesPerLinePerPage.keySet(),
rectanglesPerLinePerPage.values()
.stream()
.flatMap(Collection::stream)
.toList(),
type,
entityType.name());
}
@ -89,15 +101,18 @@ public class TextEntity implements IEntity {
duplicateTextRanges.add(textRange);
}
public boolean occursInNodeOfType(Class<? extends SemanticNode> clazz) {
return intersectingNodes.stream().anyMatch(clazz::isInstance);
return intersectingNodes.stream()
.anyMatch(clazz::isInstance);
}
public boolean occursInNode(SemanticNode semanticNode) {
return intersectingNodes.stream().anyMatch(node -> node.equals(semanticNode));
return intersectingNodes.stream()
.anyMatch(node -> node.equals(semanticNode));
}
@ -146,7 +161,10 @@ public class TextEntity implements IEntity {
.min(Comparator.comparingInt(Page::getNumber))
.orElseThrow(() -> new RuntimeException("No Positions found on any page!"));
positionsOnPagePerPage = rectanglesPerLinePerPage.entrySet().stream().map(entry -> buildPositionOnPage(firstPage, id, entry)).toList();
positionsOnPagePerPage = rectanglesPerLinePerPage.entrySet()
.stream()
.map(entry -> buildPositionOnPage(firstPage, id, entry))
.toList();
}
return positionsOnPagePerPage;
}
@ -194,7 +212,8 @@ public class TextEntity implements IEntity {
public boolean matchesAnnotationId(String manualRedactionId) {
return getPositionsOnPagePerPage().stream().anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId));
return getPositionsOnPagePerPage().stream()
.anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId));
}
@ -224,14 +243,16 @@ public class TextEntity implements IEntity {
@Override
public String type() {
return getManualOverwrite().getType().orElse(type);
return getManualOverwrite().getType()
.orElse(type);
}
@Override
public String value() {
return getManualOverwrite().getValue().orElse(getMatchedRule().isWriteValueWithLineBreaks() ? getValueWithLineBreaks() : value);
return getManualOverwrite().getValue()
.orElse(getMatchedRule().isWriteValueWithLineBreaks() ? getValueWithLineBreaks() : value);
}
}

View File

@ -65,13 +65,15 @@ public class Document implements GenericSemanticNode {
public List<Section> getMainSections() {
return streamChildrenOfType(NodeType.SECTION).map(node -> (Section) node).collect(Collectors.toList());
return streamChildrenOfType(NodeType.SECTION).map(node -> (Section) node)
.collect(Collectors.toList());
}
public Stream<TextBlock> streamTerminalTextBlocksInOrder() {
return streamAllNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock);
return streamAllNodes().filter(SemanticNode::isLeaf)
.map(SemanticNode::getLeafTextBlock);
}
@ -92,13 +94,16 @@ public class Document implements GenericSemanticNode {
@Override
public Headline getHeadline() {
return streamAllSubNodesOfType(NodeType.HEADLINE).map(node -> (Headline) node).findFirst().orElseGet(Headline::empty);
return streamAllSubNodesOfType(NodeType.HEADLINE).map(node -> (Headline) node)
.findFirst()
.orElseGet(Headline::empty);
}
private Stream<SemanticNode> streamAllNodes() {
return documentTree.allEntriesInOrder().map(DocumentTree.Entry::getNode);
return documentTree.allEntriesInOrder()
.map(DocumentTree.Entry::getNode);
}

View File

@ -106,7 +106,9 @@ public class Headline implements GenericSemanticNode {
public boolean hasParagraphs() {
return getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH).findFirst().isPresent();
return getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH)
.findFirst()
.isPresent();
}
}

View File

@ -136,7 +136,7 @@ public class Image implements GenericSemanticNode, IEntity {
Map<Page, Rectangle2D> bboxImage = image.getBBox();
Map<Page, Rectangle2D> bbox = this.getBBox();
//image needs to be on the same page
if(bboxImage.get(this.page) != null) {
if (bboxImage.get(this.page) != null) {
Rectangle2D intersection = bboxImage.get(this.page).createIntersection(bbox.get(this.page));
double calculatedIntersection = intersection.getWidth() * intersection.getHeight();
double area = bbox.get(this.page).getWidth() * bbox.get(this.page).getHeight();

View File

@ -45,7 +45,10 @@ public class Page {
public TextBlock getMainBodyTextBlock() {
return mainBody.stream().filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock).collect(new TextBlockCollector());
return mainBody.stream()
.filter(SemanticNode::isLeaf)
.map(SemanticNode::getLeafTextBlock)
.collect(new TextBlockCollector());
}
@ -54,4 +57,5 @@ public class Page {
return String.valueOf(number);
}
}

View File

@ -53,7 +53,8 @@ public class Section implements GenericSemanticNode {
public boolean hasTables() {
return streamAllSubNodesOfType(NodeType.TABLE).findAny().isPresent();
return streamAllSubNodesOfType(NodeType.TABLE).findAny()
.isPresent();
}
@ -68,7 +69,9 @@ public class Section implements GenericSemanticNode {
public TextBlock getTextBlock() {
if (textBlock == null) {
textBlock = streamAllSubNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock).collect(new TextBlockCollector());
textBlock = streamAllSubNodes().filter(SemanticNode::isLeaf)
.map(SemanticNode::getLeafTextBlock)
.collect(new TextBlockCollector());
}
return textBlock;
}

View File

@ -72,7 +72,11 @@ public class SectionIdentifier {
}
identifiers.add(Integer.parseInt(numericalIdentifier.trim()));
}
return new SectionIdentifier(Format.NUMERICAL, identifierString, identifiers.stream().toList(), false);
return new SectionIdentifier(Format.NUMERICAL,
identifierString,
identifiers.stream()
.toList(),
false);
}

View File

@ -71,7 +71,10 @@ public interface SemanticNode {
*/
default Page getFirstPage() {
return getTextBlock().getPages().stream().min(Comparator.comparingInt(Page::getNumber)).orElseThrow();
return getTextBlock().getPages()
.stream()
.min(Comparator.comparingInt(Page::getNumber))
.orElseThrow();
}
@ -97,7 +100,8 @@ public interface SemanticNode {
*/
default boolean onPage(int pageNumber) {
return getPages().stream().anyMatch(page -> page.getNumber() == pageNumber);
return getPages().stream()
.anyMatch(page -> page.getNumber() == pageNumber);
}
@ -249,7 +253,9 @@ public interface SemanticNode {
*/
default boolean hasEntitiesOfType(String type) {
return getEntities().stream().filter(TextEntity::active).anyMatch(redactionEntity -> redactionEntity.type().equals(type));
return getEntities().stream()
.filter(TextEntity::active)
.anyMatch(redactionEntity -> redactionEntity.type().equals(type));
}
@ -262,7 +268,10 @@ public interface SemanticNode {
*/
default boolean hasEntitiesOfAnyType(String... types) {
return getEntities().stream().filter(TextEntity::active).anyMatch(redactionEntity -> Arrays.stream(types).anyMatch(type -> redactionEntity.type().equals(type)));
return getEntities().stream()
.filter(TextEntity::active)
.anyMatch(redactionEntity -> Arrays.stream(types)
.anyMatch(type -> redactionEntity.type().equals(type)));
}
@ -275,7 +284,12 @@ public interface SemanticNode {
*/
default boolean hasEntitiesOfAllTypes(String... types) {
return getEntities().stream().filter(TextEntity::active).map(TextEntity::type).collect(Collectors.toUnmodifiableSet()).containsAll(Arrays.stream(types).toList());
return getEntities().stream()
.filter(TextEntity::active)
.map(TextEntity::type)
.collect(Collectors.toUnmodifiableSet())
.containsAll(Arrays.stream(types)
.toList());
}
@ -288,7 +302,10 @@ public interface SemanticNode {
*/
default List<TextEntity> getEntitiesOfType(String type) {
return getEntities().stream().filter(TextEntity::active).filter(redactionEntity -> redactionEntity.type().equals(type)).toList();
return getEntities().stream()
.filter(TextEntity::active)
.filter(redactionEntity -> redactionEntity.type().equals(type))
.toList();
}
@ -301,7 +318,10 @@ public interface SemanticNode {
*/
default List<TextEntity> getEntitiesOfType(List<String> types) {
return getEntities().stream().filter(TextEntity::active).filter(redactionEntity -> redactionEntity.isAnyType(types)).toList();
return getEntities().stream()
.filter(TextEntity::active)
.filter(redactionEntity -> redactionEntity.isAnyType(types))
.toList();
}
@ -314,7 +334,11 @@ public interface SemanticNode {
*/
default List<TextEntity> getEntitiesOfType(String... types) {
return getEntities().stream().filter(TextEntity::active).filter(redactionEntity -> redactionEntity.isAnyType(Arrays.stream(types).toList())).toList();
return getEntities().stream()
.filter(TextEntity::active)
.filter(redactionEntity -> redactionEntity.isAnyType(Arrays.stream(types)
.toList()))
.toList();
}
@ -328,7 +352,8 @@ public interface SemanticNode {
TextBlock textBlock = getTextBlock();
if (!textBlock.getAtomicTextBlocks().isEmpty()) {
return getTextBlock().getAtomicTextBlocks().get(0).getNumberOnPage();
return getTextBlock().getAtomicTextBlocks()
.get(0).getNumberOnPage();
} else {
return -1;
}
@ -357,14 +382,16 @@ public interface SemanticNode {
return getTextBlock().getSearchText().contains(string);
}
Set<LayoutEngine> getEngines();
default void addEngine(LayoutEngine engine) {
getEngines().add(engine);
}
/**
* Checks whether this SemanticNode contains all the provided Strings.
*
@ -373,7 +400,8 @@ public interface SemanticNode {
*/
default boolean containsAllStrings(String... strings) {
return Arrays.stream(strings).allMatch(this::containsString);
return Arrays.stream(strings)
.allMatch(this::containsString);
}
@ -385,7 +413,8 @@ public interface SemanticNode {
*/
default boolean containsAnyString(String... strings) {
return Arrays.stream(strings).anyMatch(this::containsString);
return Arrays.stream(strings)
.anyMatch(this::containsString);
}
@ -397,7 +426,8 @@ public interface SemanticNode {
*/
default boolean containsAnyString(List<String> strings) {
return strings.stream().anyMatch(this::containsString);
return strings.stream()
.anyMatch(this::containsString);
}
@ -421,7 +451,8 @@ public interface SemanticNode {
*/
default boolean containsAnyStringIgnoreCase(String... strings) {
return Arrays.stream(strings).anyMatch(this::containsStringIgnoreCase);
return Arrays.stream(strings)
.anyMatch(this::containsStringIgnoreCase);
}
@ -433,7 +464,8 @@ public interface SemanticNode {
*/
default boolean containsAllStringsIgnoreCase(String... strings) {
return Arrays.stream(strings).allMatch(this::containsStringIgnoreCase);
return Arrays.stream(strings)
.allMatch(this::containsStringIgnoreCase);
}
@ -445,7 +477,9 @@ public interface SemanticNode {
*/
default boolean containsWord(String word) {
return getTextBlock().getWords().stream().anyMatch(s -> s.equals(word));
return getTextBlock().getWords()
.stream()
.anyMatch(s -> s.equals(word));
}
@ -457,7 +491,10 @@ public interface SemanticNode {
*/
default boolean containsWordIgnoreCase(String word) {
return getTextBlock().getWords().stream().map(String::toLowerCase).anyMatch(s -> s.equals(word.toLowerCase(Locale.ENGLISH)));
return getTextBlock().getWords()
.stream()
.map(String::toLowerCase)
.anyMatch(s -> s.equals(word.toLowerCase(Locale.ENGLISH)));
}
@ -469,7 +506,10 @@ public interface SemanticNode {
*/
default boolean containsAnyWord(String... words) {
return Arrays.stream(words).anyMatch(word -> getTextBlock().getWords().stream().anyMatch(word::equals));
return Arrays.stream(words)
.anyMatch(word -> getTextBlock().getWords()
.stream()
.anyMatch(word::equals));
}
@ -481,7 +521,12 @@ public interface SemanticNode {
*/
default boolean containsAnyWordIgnoreCase(String... words) {
return Arrays.stream(words).map(String::toLowerCase).anyMatch(word -> getTextBlock().getWords().stream().map(String::toLowerCase).anyMatch(word::equals));
return Arrays.stream(words)
.map(String::toLowerCase)
.anyMatch(word -> getTextBlock().getWords()
.stream()
.map(String::toLowerCase)
.anyMatch(word::equals));
}
@ -493,7 +538,10 @@ public interface SemanticNode {
*/
default boolean containsAllWords(String... words) {
return Arrays.stream(words).allMatch(word -> getTextBlock().getWords().stream().anyMatch(word::equals));
return Arrays.stream(words)
.allMatch(word -> getTextBlock().getWords()
.stream()
.anyMatch(word::equals));
}
@ -505,7 +553,12 @@ public interface SemanticNode {
*/
default boolean containsAllWordsIgnoreCase(String... words) {
return Arrays.stream(words).map(String::toLowerCase).allMatch(word -> getTextBlock().getWords().stream().map(String::toLowerCase).anyMatch(word::equals));
return Arrays.stream(words)
.map(String::toLowerCase)
.allMatch(word -> getTextBlock().getWords()
.stream()
.map(String::toLowerCase)
.anyMatch(word::equals));
}
@ -545,7 +598,11 @@ public interface SemanticNode {
*/
default boolean intersectsRectangle(int x, int y, int w, int h, int pageNumber) {
return getBBox().entrySet().stream().filter(entry -> entry.getKey().getNumber() == pageNumber).map(Map.Entry::getValue).anyMatch(rect -> rect.intersects(x, y, w, h));
return getBBox().entrySet()
.stream()
.filter(entry -> entry.getKey().getNumber() == pageNumber)
.map(Map.Entry::getValue)
.anyMatch(rect -> rect.intersects(x, y, w, h));
}
@ -598,7 +655,8 @@ public interface SemanticNode {
*/
default Stream<SemanticNode> streamAllSubNodes() {
return getDocumentTree().allSubEntriesInOrder(getTreeId()).map(DocumentTree.Entry::getNode);
return getDocumentTree().allSubEntriesInOrder(getTreeId())
.map(DocumentTree.Entry::getNode);
}
@ -609,7 +667,9 @@ public interface SemanticNode {
*/
default Stream<SemanticNode> streamAllSubNodesOfType(NodeType nodeType) {
return getDocumentTree().allSubEntriesInOrder(getTreeId()).filter(entry -> entry.getType().equals(nodeType)).map(DocumentTree.Entry::getNode);
return getDocumentTree().allSubEntriesInOrder(getTreeId())
.filter(entry -> entry.getType().equals(nodeType))
.map(DocumentTree.Entry::getNode);
}
@ -648,7 +708,8 @@ public interface SemanticNode {
if (isLeaf()) {
return getTextBlock().getPositionsPerPage(textRange);
}
Optional<SemanticNode> containingChildNode = streamChildren().filter(child -> child.getTextRange().contains(textRange)).findFirst();
Optional<SemanticNode> containingChildNode = streamChildren().filter(child -> child.getTextRange().contains(textRange))
.findFirst();
if (containingChildNode.isEmpty()) {
return getTextBlock().getPositionsPerPage(textRange);
}
@ -698,8 +759,12 @@ public interface SemanticNode {
private Map<Page, Rectangle2D> getBBoxFromChildren() {
Map<Page, Rectangle2D> bBoxPerPage = new HashMap<>();
List<Map<Page, Rectangle2D>> childrenBBoxes = streamChildren().map(SemanticNode::getBBox).toList();
Set<Page> pages = childrenBBoxes.stream().flatMap(map -> map.keySet().stream()).collect(Collectors.toSet());
List<Map<Page, Rectangle2D>> childrenBBoxes = streamChildren().map(SemanticNode::getBBox)
.toList();
Set<Page> pages = childrenBBoxes.stream()
.flatMap(map -> map.keySet()
.stream())
.collect(Collectors.toSet());
for (Page page : pages) {
Rectangle2D bBoxOnPage = childrenBBoxes.stream()
.filter(childBboxPerPage -> childBboxPerPage.containsKey(page))
@ -717,7 +782,9 @@ public interface SemanticNode {
private Map<Page, Rectangle2D> getBBoxFromLeafTextBlock() {
Map<Page, Rectangle2D> bBoxPerPage = new HashMap<>();
Map<Page, List<AtomicTextBlock>> atomicTextBlockPerPage = getTextBlock().getAtomicTextBlocks().stream().collect(Collectors.groupingBy(AtomicTextBlock::getPage));
Map<Page, List<AtomicTextBlock>> atomicTextBlockPerPage = getTextBlock().getAtomicTextBlocks()
.stream()
.collect(Collectors.groupingBy(AtomicTextBlock::getPage));
atomicTextBlockPerPage.forEach((page, atomicTextBlocks) -> bBoxPerPage.put(page, RectangleTransformations.atomicTextBlockBBox(atomicTextBlocks)));
return bBoxPerPage;
}

View File

@ -79,7 +79,9 @@ public class TableCell implements GenericSemanticNode {
}
if (textBlock == null) {
textBlock = streamAllSubNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock).collect(new TextBlockCollector());
textBlock = streamAllSubNodes().filter(SemanticNode::isLeaf)
.map(SemanticNode::getLeafTextBlock)
.collect(new TextBlockCollector());
}
return textBlock;
}

View File

@ -61,6 +61,7 @@ public class AtomicTextBlock implements TextBlock {
return lineBreaks.size() + 1;
}
public static AtomicTextBlock empty(Long textBlockIdx, int stringOffset, Page page, int numberOnPage, SemanticNode parent) {
return AtomicTextBlock.builder()
@ -77,10 +78,7 @@ public class AtomicTextBlock implements TextBlock {
}
public static AtomicTextBlock fromAtomicTextBlockData(DocumentTextData atomicTextBlockData,
DocumentPositionData atomicPositionBlockData,
SemanticNode parent,
Page page) {
public static AtomicTextBlock fromAtomicTextBlockData(DocumentTextData atomicTextBlockData, DocumentPositionData atomicPositionBlockData, SemanticNode parent, Page page) {
return AtomicTextBlock.builder()
.id(atomicTextBlockData.getId())
@ -88,8 +86,10 @@ public class AtomicTextBlock implements TextBlock {
.page(page)
.textRange(new TextRange(atomicTextBlockData.getStart(), atomicTextBlockData.getEnd()))
.searchText(atomicTextBlockData.getSearchText())
.lineBreaks(Arrays.stream(atomicTextBlockData.getLineBreaks()).boxed().toList())
.stringIdxToPositionIdx(Arrays.stream(atomicPositionBlockData.getStringIdxToPositionIdx()).boxed().toList())
.lineBreaks(Arrays.stream(atomicTextBlockData.getLineBreaks()).boxed()
.toList())
.stringIdxToPositionIdx(Arrays.stream(atomicPositionBlockData.getStringIdxToPositionIdx()).boxed()
.toList())
.positions(toRectangle2DList(atomicPositionBlockData.getPositions()))
.parent(parent)
.build();
@ -98,7 +98,9 @@ public class AtomicTextBlock implements TextBlock {
private static List<Rectangle2D> toRectangle2DList(float[][] positions) {
return Arrays.stream(positions).map(floatArr -> (Rectangle2D) new Rectangle2D.Float(floatArr[0], floatArr[1], floatArr[2], floatArr[3])).toList();
return Arrays.stream(positions)
.map(floatArr -> (Rectangle2D) new Rectangle2D.Float(floatArr[0], floatArr[1], floatArr[2], floatArr[3]))
.toList();
}
@ -118,6 +120,7 @@ public class AtomicTextBlock implements TextBlock {
return new TextRange(lineBreaks.get(lineNumber - 1) + textRange.start(), lineBreaks.get(lineNumber) + textRange.start());
}
public List<String> getWords() {
if (words == null) {
@ -144,9 +147,9 @@ public class AtomicTextBlock implements TextBlock {
public int getNextLinebreak(int fromIndex) {
return lineBreaks.stream()//
.filter(linebreak -> linebreak > fromIndex - textRange.start()) //
.findFirst() //
.orElse(searchText.length()) + textRange.start();
.filter(linebreak -> linebreak > fromIndex - textRange.start()) //
.findFirst() //
.orElse(searchText.length()) + textRange.start();
}
@ -154,9 +157,9 @@ public class AtomicTextBlock implements TextBlock {
public int getPreviousLinebreak(int fromIndex) {
return lineBreaks.stream()//
.filter(linebreak -> linebreak <= fromIndex - textRange.start())//
.reduce((a, b) -> b)//
.orElse(0) + textRange.start();
.filter(linebreak -> linebreak <= fromIndex - textRange.start())//
.reduce((a, b) -> b)//
.orElse(0) + textRange.start();
}
@ -209,7 +212,10 @@ public class AtomicTextBlock implements TextBlock {
return "";
}
Set<Integer> lbInBoundary = lineBreaks.stream().map(i -> i + textRange.start()).filter(textRange::contains).collect(Collectors.toSet());
Set<Integer> lbInBoundary = lineBreaks.stream()
.map(i -> i + textRange.start())
.filter(textRange::contains)
.collect(Collectors.toSet());
if (textRange.end() == getTextRange().end()) {
lbInBoundary.add(getTextRange().end());
}
@ -235,7 +241,10 @@ public class AtomicTextBlock implements TextBlock {
private List<Integer> getAllLineBreaksInBoundary(TextRange textRange) {
return getLineBreaks().stream().map(linebreak -> linebreak + this.textRange.start()).filter(textRange::contains).toList();
return getLineBreaks().stream()
.map(linebreak -> linebreak + this.textRange.start())
.filter(textRange::contains)
.toList();
}

View File

@ -44,7 +44,8 @@ public class ConcatenatedTextBlock implements TextBlock {
this.atomicTextBlocks.add(firstTextBlock);
textRange = new TextRange(firstTextBlock.getTextRange().start(), firstTextBlock.getTextRange().end());
atomicTextBlocks.subList(1, atomicTextBlocks.size()).forEach(this::concat);
atomicTextBlocks.subList(1, atomicTextBlocks.size())
.forEach(this::concat);
}
@ -65,7 +66,10 @@ public class ConcatenatedTextBlock implements TextBlock {
private AtomicTextBlock getAtomicTextBlockByStringIndex(int stringIdx) {
return atomicTextBlocks.stream().filter(textBlock -> textBlock.getTextRange().contains(stringIdx)).findAny().orElseThrow(IndexOutOfBoundsException::new);
return atomicTextBlocks.stream()
.filter(textBlock -> textBlock.getTextRange().contains(stringIdx))
.findAny()
.orElseThrow(IndexOutOfBoundsException::new);
}
@ -99,14 +103,18 @@ public class ConcatenatedTextBlock implements TextBlock {
@Override
public List<String> getWords() {
return atomicTextBlocks.stream().map(AtomicTextBlock::getWords).flatMap(Collection::stream).toList();
return atomicTextBlocks.stream()
.map(AtomicTextBlock::getWords)
.flatMap(Collection::stream)
.toList();
}
@Override
public int numberOfLines() {
return atomicTextBlocks.stream().mapToInt(AtomicTextBlock::numberOfLines).sum();
return atomicTextBlocks.stream()
.mapToInt(AtomicTextBlock::numberOfLines).sum();
}
@ -127,7 +135,10 @@ public class ConcatenatedTextBlock implements TextBlock {
@Override
public List<Integer> getLineBreaks() {
return getAtomicTextBlocks().stream().flatMap(atomicTextBlock -> atomicTextBlock.getLineBreaks().stream()).toList();
return getAtomicTextBlocks().stream()
.flatMap(atomicTextBlock -> atomicTextBlock.getLineBreaks()
.stream())
.toList();
}
@ -202,7 +213,8 @@ public class ConcatenatedTextBlock implements TextBlock {
AtomicTextBlock lastTextBlock = textBlocks.get(textBlocks.size() - 1);
rectanglesPerLinePerPage = mergeEntityPositionsWithSamePageNode(rectanglesPerLinePerPage,
lastTextBlock.getPositionsPerPage(new TextRange(lastTextBlock.getTextRange().start(), stringTextRange.end())));
lastTextBlock.getPositionsPerPage(new TextRange(lastTextBlock.getTextRange().start(),
stringTextRange.end())));
return rectanglesPerLinePerPage;
}
@ -239,7 +251,10 @@ public class ConcatenatedTextBlock implements TextBlock {
private Map<Page, List<Rectangle2D>> mergeEntityPositionsWithSamePageNode(Map<Page, List<Rectangle2D>> map1, Map<Page, List<Rectangle2D>> map2) {
Map<Page, List<Rectangle2D>> mergedMap = new HashMap<>(map1);
map2.forEach((pageNode, rectangles) -> mergedMap.merge(pageNode, rectangles, (l1, l2) -> Stream.concat(l1.stream(), l2.stream()).toList()));
map2.forEach((pageNode, rectangles) -> mergedMap.merge(pageNode,
rectangles,
(l1, l2) -> Stream.concat(l1.stream(), l2.stream())
.toList()));
return mergedMap;
}

View File

@ -18,8 +18,10 @@ public interface TextBlock extends CharSequence {
String getSearchText();
List<String> getWords();
List<AtomicTextBlock> getAtomicTextBlocks();
@ -35,7 +37,6 @@ public interface TextBlock extends CharSequence {
TextRange getLineTextRange(int lineNumber);
List<Integer> getLineBreaks();
@ -71,6 +72,7 @@ public interface TextBlock extends CharSequence {
return RectangleTransformations.rectangle2DBBox(getLinePositions(lineNumber));
}
default String searchTextWithLineBreaks() {
return subSequenceWithLineBreaks(getTextRange());
@ -85,7 +87,9 @@ public interface TextBlock extends CharSequence {
default Set<Page> getPages() {
return getAtomicTextBlocks().stream().map(AtomicTextBlock::getPage).collect(Collectors.toUnmodifiableSet());
return getAtomicTextBlocks().stream()
.map(AtomicTextBlock::getPage)
.collect(Collectors.toUnmodifiableSet());
}

View File

@ -9,7 +9,8 @@ public record RuleClass(RuleType ruleType, List<RuleUnit> ruleUnits) {
public Optional<RuleUnit> findRuleUnitByInteger(Integer unit) {
return ruleUnits.stream()
.filter(ruleUnit -> Objects.equals(ruleUnit.unit(), unit)).findFirst();
.filter(ruleUnit -> Objects.equals(ruleUnit.unit(), unit))
.findFirst();
}
}

View File

@ -37,13 +37,17 @@ public final class RuleFileBluePrint {
public Optional<RuleClass> findRuleClassByType(RuleType ruleType) {
return ruleClasses.stream().filter(ruleClass -> Objects.equals(ruleClass.ruleType(), ruleType)).findFirst();
return ruleClasses.stream()
.filter(ruleClass -> Objects.equals(ruleClass.ruleType(), ruleType))
.findFirst();
}
public Set<String> getImportSplitByKeyword() {
return Arrays.stream(imports.replaceAll("\n", "").split("import")).map(String::trim).collect(Collectors.toSet());
return Arrays.stream(imports.replaceAll("\n", "").split("import"))
.map(String::trim)
.collect(Collectors.toSet());
}
@ -53,11 +57,15 @@ public final class RuleFileBluePrint {
return findRuleClassByType(ruleIdentifier.type()).map(RuleClass::ruleUnits)
.orElse(Collections.emptyList())
.stream()
.flatMap(ruleUnit -> ruleUnit.rules().stream().filter(rule -> rule.getIdentifier().matches(ruleIdentifier)))
.flatMap(ruleUnit -> ruleUnit.rules()
.stream()
.filter(rule -> rule.getIdentifier().matches(ruleIdentifier)))
.toList();
}
return findRuleClassByType(ruleIdentifier.type()).flatMap(ruleClass -> ruleClass.findRuleUnitByInteger(ruleIdentifier.unit()))
.map(ruleUnit -> ruleUnit.rules().stream().filter(rule -> rule.getIdentifier().matches(ruleIdentifier)))
.map(ruleUnit -> ruleUnit.rules()
.stream()
.filter(rule -> rule.getIdentifier().matches(ruleIdentifier)))
.orElse(Stream.empty())
.toList();
}
@ -65,13 +73,18 @@ public final class RuleFileBluePrint {
public List<RuleIdentifier> getAllRuleIdentifiers() {
return streamAllRules().map(BasicRule::getIdentifier).collect(Collectors.toList());
return streamAllRules().map(BasicRule::getIdentifier)
.collect(Collectors.toList());
}
public Stream<BasicRule> streamAllRules() {
return getRuleClasses().stream().map(RuleClass::ruleUnits).flatMap(Collection::stream).map(RuleUnit::rules).flatMap(Collection::stream);
return getRuleClasses().stream()
.map(RuleClass::ruleUnits)
.flatMap(Collection::stream)
.map(RuleUnit::rules)
.flatMap(Collection::stream);
}

View File

@ -42,8 +42,8 @@ public record RuleIdentifier(@NonNull RuleType type, Integer unit, Integer id) {
public boolean matches(RuleIdentifier ruleIdentifier) {
return ruleIdentifier.type().equals(this.type()) && //
(Objects.isNull(ruleIdentifier.unit()) || Objects.isNull(this.unit()) || Objects.equals(this.unit(), ruleIdentifier.unit())) && //
(Objects.isNull(ruleIdentifier.id()) || Objects.isNull(this.id()) || Objects.equals(this.id(), ruleIdentifier.id()));
(Objects.isNull(ruleIdentifier.unit()) || Objects.isNull(this.unit()) || Objects.equals(this.unit(), ruleIdentifier.unit())) && //
(Objects.isNull(ruleIdentifier.id()) || Objects.isNull(this.id()) || Objects.equals(this.id(), ruleIdentifier.id()));
}

View File

@ -70,6 +70,7 @@ public class MessagingConfiguration {
.build();
}
@Bean
public Queue redactionAnalysisResponseQueue() {

View File

@ -51,14 +51,14 @@ public class RedactionMessageReceiver {
// This prevents from endless retries oom errors.
if (message.getMessageProperties().isRedelivered()) {
var errorMessage = format("Error during last processing of request with dossierId: %s and fileId: %s, do not retry.",
analyzeRequest.getDossierId(),
analyzeRequest.getFileId());
analyzeRequest.getDossierId(),
analyzeRequest.getFileId());
fileStatusProcessingUpdateClient.analysisFailed(analyzeRequest.getDossierId(),
analyzeRequest.getFileId(),
new FileErrorInfo(errorMessage,
priority ? REDACTION_PRIORITY_QUEUE : REDACTION_QUEUE,
"redaction-service",
OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS)));
analyzeRequest.getFileId(),
new FileErrorInfo(errorMessage,
priority ? REDACTION_PRIORITY_QUEUE : REDACTION_QUEUE,
"redaction-service",
OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS)));
throw new AmqpRejectAndDontRequeueException(errorMessage);
}
@ -84,9 +84,9 @@ public class RedactionMessageReceiver {
log.debug(analyzeRequest.getManualRedactions().toString());
result = analyzeService.analyze(analyzeRequest);
log.info("Successfully analyzed dossier {} file {} took: {} s",
analyzeRequest.getDossierId(),
analyzeRequest.getFileId(),
format("%.2f", result.getDuration() / 1000.0));
analyzeRequest.getDossierId(),
analyzeRequest.getFileId(),
format("%.2f", result.getDuration() / 1000.0));
log.info("----------------------------------------------------------------------------------");
break;
@ -96,9 +96,9 @@ public class RedactionMessageReceiver {
log.debug(analyzeRequest.getManualRedactions().toString());
result = analyzeService.reanalyze(analyzeRequest);
log.info("Successfully reanalyzed dossier {} file {} took: {} s",
analyzeRequest.getDossierId(),
analyzeRequest.getFileId(),
format("%.2f", result.getDuration() / 1000.0));
analyzeRequest.getDossierId(),
analyzeRequest.getFileId(),
format("%.2f", result.getDuration() / 1000.0));
log.info("----------------------------------------------------------------------------------");
break;
case SURROUNDING_TEXT_ANALYSIS:
@ -106,9 +106,7 @@ public class RedactionMessageReceiver {
log.info("Starting Surrounding Text Analysis for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
log.debug(analyzeRequest.getManualRedactions().toString());
unprocessedChangesService.analyseSurroundingText(analyzeRequest);
log.info("Successful Surrounding Text Analysis dossier {} file {} ",
analyzeRequest.getDossierId(),
analyzeRequest.getFileId());
log.info("Successful Surrounding Text Analysis dossier {} file {} ", analyzeRequest.getDossierId(), analyzeRequest.getFileId());
log.info("-------------------------------------------------------------------------------------------------");
shouldRespond = false;
break;
@ -137,8 +135,8 @@ public class RedactionMessageReceiver {
log.warn("Failed to process analyze request: {}", analyzeRequest, e);
var timestamp = OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS);
fileStatusProcessingUpdateClient.analysisFailed(analyzeRequest.getDossierId(),
analyzeRequest.getFileId(),
new FileErrorInfo(e.getMessage(), priority ? REDACTION_PRIORITY_QUEUE : REDACTION_QUEUE, "redaction-service", timestamp));
analyzeRequest.getFileId(),
new FileErrorInfo(e.getMessage(), priority ? REDACTION_PRIORITY_QUEUE : REDACTION_QUEUE, "redaction-service", timestamp));
}
@ -153,8 +151,8 @@ public class RedactionMessageReceiver {
timestamp = timestamp != null ? timestamp : OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS);
log.info("Failed to process analyze request, errorCause: {}, timestamp: {}", errorCause, timestamp);
fileStatusProcessingUpdateClient.analysisFailed(analyzeRequest.getDossierId(),
analyzeRequest.getFileId(),
new FileErrorInfo(errorCause, REDACTION_DQL, "redaction-service", timestamp));
analyzeRequest.getFileId(),
new FileErrorInfo(errorCause, REDACTION_DQL, "redaction-service", timestamp));
}
}

View File

@ -23,13 +23,15 @@ public class ComponentLogCreatorService {
public ComponentLog buildComponentLog(int analysisNumber, List<Component> components, long componentRulesVersion) {
Map<String, List<ComponentLogEntryValue>> map = new HashMap<>();
components.stream().sorted(ComponentComparator.first()).forEach(component -> {
ComponentLogEntryValue componentLogEntryValue = buildComponentLogEntry(component);
map.computeIfAbsent(component.getName(), k -> new ArrayList<>()).add(componentLogEntryValue);
});
List<ComponentLogEntry> componentLogComponents = map
.entrySet()
.stream().map(entry -> new ComponentLogEntry(entry.getKey(), entry.getValue()))
components.stream()
.sorted(ComponentComparator.first())
.forEach(component -> {
ComponentLogEntryValue componentLogEntryValue = buildComponentLogEntry(component);
map.computeIfAbsent(component.getName(), k -> new ArrayList<>()).add(componentLogEntryValue);
});
List<ComponentLogEntry> componentLogComponents = map.entrySet()
.stream()
.map(entry -> new ComponentLogEntry(entry.getKey(), entry.getValue()))
.toList();
return new ComponentLog(analysisNumber, componentRulesVersion, componentLogComponents);
}
@ -38,24 +40,36 @@ public class ComponentLogCreatorService {
private ComponentLogEntryValue buildComponentLogEntry(Component component) {
return ComponentLogEntryValue.builder()
.value(component.getValue()).originalValue(component.getValue())
.value(component.getValue())
.originalValue(component.getValue())
.componentRuleId(component.getMatchedRule().toString())
.valueDescription(component.getValueDescription())
.componentLogEntityReferences(toComponentEntityReferences(component.getReferences().stream().sorted(EntityComparators.first()).toList()))
.componentLogEntityReferences(toComponentEntityReferences(component.getReferences()
.stream()
.sorted(EntityComparators.first())
.toList()))
.build();
}
private List<ComponentLogEntityReference> toComponentEntityReferences(List<Entity> references) {
return references.stream().map(this::toComponentEntityReference).toList();
return references.stream()
.map(this::toComponentEntityReference)
.toList();
}
private ComponentLogEntityReference toComponentEntityReference(Entity entity) {
return ComponentLogEntityReference.builder().id(entity.getId())
.page(entity.getPositions().stream().findFirst().map(Position::getPageNumber).orElse(0)).entityRuleId(entity.getMatchedRule())
return ComponentLogEntityReference.builder()
.id(entity.getId())
.page(entity.getPositions()
.stream()
.findFirst()
.map(Position::getPageNumber)
.orElse(0))
.entityRuleId(entity.getMatchedRule())
.type(entity.getType())
.build();
}

View File

@ -56,10 +56,11 @@ public class DictionarySearchService {
searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange())
.stream()
.filter(boundary -> entityCreationService.isValidEntityTextRange(node.getTextBlock(), boundary))
.forEach(bounds -> entityCreationService.byTextRangeWithEngine(bounds, type, entityType, node, Set.of(Engine.DICTIONARY)).ifPresent(entity -> {
entity.setDictionaryEntry(true);
entity.setDossierDictionaryEntry(isDossierDictionaryEntry);
}));
.forEach(bounds -> entityCreationService.byTextRangeWithEngine(bounds, type, entityType, node, Set.of(Engine.DICTIONARY))
.ifPresent(entity -> {
entity.setDictionaryEntry(true);
entity.setDossierDictionaryEntry(isDossierDictionaryEntry);
}));
}
}

View File

@ -106,41 +106,47 @@ public class DictionaryService {
List<DictionaryModel> dictionaryModels = getDossierTemplateDictionary(dossierTemplateId).getDictionary();
dictionaryModels.forEach(dictionaryModel -> {
dictionaryModel.getEntries().forEach(dictionaryEntry -> {
if (dictionaryEntry.getVersion() > fromVersion.getDossierTemplateVersion()) {
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
}
});
dictionaryModel.getFalsePositives().forEach(dictionaryEntry -> {
if (dictionaryEntry.getVersion() > fromVersion.getDossierTemplateVersion()) {
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
}
});
dictionaryModel.getFalseRecommendations().forEach(dictionaryEntry -> {
if (dictionaryEntry.getVersion() > fromVersion.getDossierTemplateVersion()) {
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
}
});
dictionaryModel.getEntries()
.forEach(dictionaryEntry -> {
if (dictionaryEntry.getVersion() > fromVersion.getDossierTemplateVersion()) {
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
}
});
dictionaryModel.getFalsePositives()
.forEach(dictionaryEntry -> {
if (dictionaryEntry.getVersion() > fromVersion.getDossierTemplateVersion()) {
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
}
});
dictionaryModel.getFalseRecommendations()
.forEach(dictionaryEntry -> {
if (dictionaryEntry.getVersion() > fromVersion.getDossierTemplateVersion()) {
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
}
});
});
if (dossierDictionaryExists(dossierId)) {
dictionaryModels = getDossierDictionary(dossierId).getDictionary();
dictionaryModels.forEach(dictionaryModel -> {
dictionaryModel.getEntries().forEach(dictionaryEntry -> {
if (dictionaryEntry.getVersion() > fromVersion.getDossierVersion()) {
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
}
});
dictionaryModel.getFalsePositives().forEach(dictionaryEntry -> {
if (dictionaryEntry.getVersion() > fromVersion.getDossierVersion()) {
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
}
});
dictionaryModel.getFalseRecommendations().forEach(dictionaryEntry -> {
if (dictionaryEntry.getVersion() > fromVersion.getDossierVersion()) {
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
}
});
dictionaryModel.getEntries()
.forEach(dictionaryEntry -> {
if (dictionaryEntry.getVersion() > fromVersion.getDossierVersion()) {
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
}
});
dictionaryModel.getFalsePositives()
.forEach(dictionaryEntry -> {
if (dictionaryEntry.getVersion() > fromVersion.getDossierVersion()) {
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
}
});
dictionaryModel.getFalseRecommendations()
.forEach(dictionaryEntry -> {
if (dictionaryEntry.getVersion() > fromVersion.getDossierVersion()) {
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
}
});
});
}
@ -155,84 +161,120 @@ public class DictionaryService {
DictionaryRepresentation dictionaryRepresentation = new DictionaryRepresentation();
var typeResponse = dossierId == null ? dictionaryClient.getAllTypesForDossierTemplate(dossierTemplateId, true) : dictionaryClient.getAllTypesForDossier(dossierId,
true);
true);
if (CollectionUtils.isNotEmpty(typeResponse)) {
List<DictionaryModel> dictionary = typeResponse.stream().map(t -> {
List<DictionaryModel> dictionary = typeResponse.stream()
.map(t -> {
Optional<DictionaryModel> optionalOldModel;
if (dossierId == null) {
var representation = getDossierTemplateDictionary(dossierTemplateId);
optionalOldModel = representation != null ? representation.getDictionary()
.stream()
.filter(f -> f.getType().equals(t.getType()))
.findAny() : Optional.empty();
} else {
var representation = getDossierDictionary(dossierId);
optionalOldModel = representation != null ? representation.getDictionary()
.stream()
.filter(f -> f.getType().equals(t.getType()))
.findAny() : Optional.empty();
}
Optional<DictionaryModel> optionalOldModel;
if (dossierId == null) {
var representation = getDossierTemplateDictionary(dossierTemplateId);
optionalOldModel = representation != null ? representation.getDictionary()
.stream()
.filter(f -> f.getType().equals(t.getType()))
.findAny() : Optional.empty();
} else {
var representation = getDossierDictionary(dossierId);
optionalOldModel = representation != null ? representation.getDictionary()
.stream()
.filter(f -> f.getType().equals(t.getType()))
.findAny() : Optional.empty();
}
Set<DictionaryEntryModel> entries = new HashSet<>();
Set<DictionaryEntryModel> falsePositives = new HashSet<>();
Set<DictionaryEntryModel> falseRecommendations = new HashSet<>();
Set<DictionaryEntryModel> entries = new HashSet<>();
Set<DictionaryEntryModel> falsePositives = new HashSet<>();
Set<DictionaryEntryModel> falseRecommendations = new HashSet<>();
DictionaryEntries newEntries = getEntries(t.getId(), currentVersion);
DictionaryEntries newEntries = getEntries(t.getId(), currentVersion);
var newValues = newEntries.getEntries().stream().map(DictionaryEntry::getValue).collect(Collectors.toSet());
var newFalsePositivesValues = newEntries.getFalsePositives().stream().map(DictionaryEntry::getValue).collect(Collectors.toSet());
var newFalseRecommendationsValues = newEntries.getFalseRecommendations().stream().map(DictionaryEntry::getValue).collect(Collectors.toSet());
optionalOldModel.ifPresent(oldDictionaryModel -> {
});
if (optionalOldModel.isPresent()) {
var oldModel = optionalOldModel.get();
if (oldModel.isCaseInsensitive() && !t.isCaseInsensitive()) {
// add old entries from existing DictionaryModel but exclude lower case representation
entries.addAll(oldModel.getEntries().stream().filter(f -> !newValues.stream().map(s -> s.toLowerCase(Locale.ROOT)).toList().contains(f.getValue())).toList());
falsePositives.addAll(oldModel.getFalsePositives()
var newValues = newEntries.getEntries()
.stream()
.filter(f -> !newFalsePositivesValues.stream().map(s -> s.toLowerCase(Locale.ROOT)).toList().contains(f.getValue()))
.toList());
falseRecommendations.addAll(oldModel.getFalseRecommendations()
.map(DictionaryEntry::getValue)
.collect(Collectors.toSet());
var newFalsePositivesValues = newEntries.getFalsePositives()
.stream()
.filter(f -> !newFalseRecommendationsValues.stream().map(s -> s.toLowerCase(Locale.ROOT)).toList().contains(f.getValue()))
.toList());
} else if (!oldModel.isCaseInsensitive() && t.isCaseInsensitive()) {
// add old entries from existing DictionaryModel but exclude upper case representation
entries.addAll(oldModel.getEntries().stream().filter(f -> !newValues.contains(f.getValue().toLowerCase(Locale.ROOT))).toList());
falsePositives.addAll(oldModel.getFalsePositives().stream().filter(f -> !newFalsePositivesValues.contains(f.getValue().toLowerCase(Locale.ROOT))).toList());
falseRecommendations.addAll(oldModel.getFalseRecommendations()
.map(DictionaryEntry::getValue)
.collect(Collectors.toSet());
var newFalseRecommendationsValues = newEntries.getFalseRecommendations()
.stream()
.filter(f -> !newFalseRecommendationsValues.contains(f.getValue().toLowerCase(Locale.ROOT)))
.toList());
.map(DictionaryEntry::getValue)
.collect(Collectors.toSet());
} else {
// add old entries from existing DictionaryModel
entries.addAll(oldModel.getEntries().stream().filter(f -> !newValues.contains(f.getValue())).toList());
falsePositives.addAll(oldModel.getFalsePositives().stream().filter(f -> !newFalsePositivesValues.contains(f.getValue())).toList());
falseRecommendations.addAll(oldModel.getFalseRecommendations().stream().filter(f -> !newFalseRecommendationsValues.contains(f.getValue())).toList());
}
}
optionalOldModel.ifPresent(oldDictionaryModel -> {
// Add Increments
entries.addAll(newEntries.getEntries());
falsePositives.addAll(newEntries.getFalsePositives());
falseRecommendations.addAll(newEntries.getFalseRecommendations());
});
if (optionalOldModel.isPresent()) {
var oldModel = optionalOldModel.get();
if (oldModel.isCaseInsensitive() && !t.isCaseInsensitive()) {
// add old entries from existing DictionaryModel but exclude lower case representation
entries.addAll(oldModel.getEntries()
.stream()
.filter(f -> !newValues.stream()
.map(s -> s.toLowerCase(Locale.ROOT))
.toList().contains(f.getValue()))
.toList());
falsePositives.addAll(oldModel.getFalsePositives()
.stream()
.filter(f -> !newFalsePositivesValues.stream()
.map(s -> s.toLowerCase(Locale.ROOT))
.toList().contains(f.getValue()))
.toList());
falseRecommendations.addAll(oldModel.getFalseRecommendations()
.stream()
.filter(f -> !newFalseRecommendationsValues.stream()
.map(s -> s.toLowerCase(Locale.ROOT))
.toList().contains(f.getValue()))
.toList());
} else if (!oldModel.isCaseInsensitive() && t.isCaseInsensitive()) {
// add old entries from existing DictionaryModel but exclude upper case representation
entries.addAll(oldModel.getEntries()
.stream()
.filter(f -> !newValues.contains(f.getValue().toLowerCase(Locale.ROOT)))
.toList());
falsePositives.addAll(oldModel.getFalsePositives()
.stream()
.filter(f -> !newFalsePositivesValues.contains(f.getValue().toLowerCase(Locale.ROOT)))
.toList());
falseRecommendations.addAll(oldModel.getFalseRecommendations()
.stream()
.filter(f -> !newFalseRecommendationsValues.contains(f.getValue().toLowerCase(Locale.ROOT)))
.toList());
return new DictionaryModel(t.getType(),
t.getRank(),
convertColor(t.getHexColor()),
t.isCaseInsensitive(),
t.isHint(),
entries,
falsePositives,
falseRecommendations,
dossierId != null);
}).sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed()).collect(Collectors.toList());
} else {
// add old entries from existing DictionaryModel
entries.addAll(oldModel.getEntries()
.stream()
.filter(f -> !newValues.contains(f.getValue()))
.toList());
falsePositives.addAll(oldModel.getFalsePositives()
.stream()
.filter(f -> !newFalsePositivesValues.contains(f.getValue()))
.toList());
falseRecommendations.addAll(oldModel.getFalseRecommendations()
.stream()
.filter(f -> !newFalseRecommendationsValues.contains(f.getValue()))
.toList());
}
}
// Add Increments
entries.addAll(newEntries.getEntries());
falsePositives.addAll(newEntries.getFalsePositives());
falseRecommendations.addAll(newEntries.getFalseRecommendations());
return new DictionaryModel(t.getType(),
t.getRank(),
convertColor(t.getHexColor()),
t.isCaseInsensitive(),
t.isHint(),
entries,
falsePositives,
falseRecommendations,
dossierId != null);
})
.sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed())
.collect(Collectors.toList());
dictionary.forEach(dm -> dictionaryRepresentation.getLocalAccessMap().put(dm.getType(), dm));
@ -264,17 +306,17 @@ public class DictionaryService {
var type = dictionaryClient.getDictionaryForType(typeId, fromVersion);
Set<DictionaryEntryModel> entries = type.getEntries() != null ? new HashSet<>(type.getEntries()
.stream()
.map(DictionaryEntryModel::new)
.collect(Collectors.toSet())) : new HashSet<>();
.stream()
.map(DictionaryEntryModel::new)
.collect(Collectors.toSet())) : new HashSet<>();
Set<DictionaryEntryModel> falsePositives = type.getFalsePositiveEntries() != null ? new HashSet<>(type.getFalsePositiveEntries()
.stream()
.map(DictionaryEntryModel::new)
.collect(Collectors.toSet())) : new HashSet<>();
.stream()
.map(DictionaryEntryModel::new)
.collect(Collectors.toSet())) : new HashSet<>();
Set<DictionaryEntryModel> falseRecommendations = type.getFalseRecommendationEntries() != null ? new HashSet<>(type.getFalseRecommendationEntries()
.stream()
.map(DictionaryEntryModel::new)
.collect(Collectors.toSet())) : new HashSet<>();
.stream()
.map(DictionaryEntryModel::new)
.collect(Collectors.toSet())) : new HashSet<>();
if (type.isCaseInsensitive()) {
entries.forEach(entry -> entry.setValue(entry.getValue().toLowerCase(Locale.ROOT)));
@ -282,10 +324,10 @@ public class DictionaryService {
falseRecommendations.forEach(entry -> entry.setValue(entry.getValue().toLowerCase(Locale.ROOT)));
}
log.debug("Dictionary update returned {} entries {} falsePositives and {} falseRecommendations for type {}",
entries.size(),
falsePositives.size(),
falseRecommendations.size(),
typeId);
entries.size(),
falsePositives.size(),
falseRecommendations.size(),
typeId);
return new DictionaryEntries(entries, falsePositives, falseRecommendations);
}
@ -300,7 +342,8 @@ public class DictionaryService {
@SneakyThrows
public float[] getColor(String type, String dossierTemplateId) {
DictionaryModel model = getDossierTemplateDictionary(dossierTemplateId).getLocalAccessMap().get(type);
DictionaryModel model = getDossierTemplateDictionary(dossierTemplateId).getLocalAccessMap()
.get(type);
if (model != null) {
return model.getColor();
}
@ -311,7 +354,8 @@ public class DictionaryService {
@SneakyThrows
public boolean isHint(String type, String dossierTemplateId) {
DictionaryModel model = getDossierTemplateDictionary(dossierTemplateId).getLocalAccessMap().get(type);
DictionaryModel model = getDossierTemplateDictionary(dossierTemplateId).getLocalAccessMap()
.get(type);
if (model != null) {
return model.isHint();
}
@ -335,15 +379,20 @@ public class DictionaryService {
var dossierRepresentation = getDossierDictionary(dossierId);
var dossierDictionaries = dossierRepresentation.getDictionary();
mergedDictionaries = convertCommonsDictionaryModel(dictionaryMergeService.getMergedDictionary(convertDictionaryModel(dossierTemplateDictionaries),
convertDictionaryModel(dossierDictionaries)));
convertDictionaryModel(dossierDictionaries)));
dossierDictionaryVersion = dossierRepresentation.getDictionaryVersion();
} else {
mergedDictionaries = new ArrayList<>();
dossierTemplateDictionaries.forEach(dm -> mergedDictionaries.add(SerializationUtils.clone(dm)));
}
return new Dictionary(mergedDictionaries.stream().sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed()).collect(Collectors.toList()),
DictionaryVersion.builder().dossierTemplateVersion(dossierTemplateRepresentation.getDictionaryVersion()).dossierVersion(dossierDictionaryVersion).build());
return new Dictionary(mergedDictionaries.stream()
.sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed())
.collect(Collectors.toList()),
DictionaryVersion.builder()
.dossierTemplateVersion(dossierTemplateRepresentation.getDictionaryVersion())
.dossierVersion(dossierDictionaryVersion)
.build());
}
@ -371,14 +420,16 @@ public class DictionaryService {
@SneakyThrows
private DictionaryRepresentation getDossierTemplateDictionary(String dossierTemplateId) {
return tenantDictionaryCache.get(TenantContext.getTenantId()).getDictionariesByDossierTemplate().get(dossierTemplateId);
return tenantDictionaryCache.get(TenantContext.getTenantId()).getDictionariesByDossierTemplate()
.get(dossierTemplateId);
}
@SneakyThrows
private DictionaryRepresentation getDossierDictionary(String dossierId) {
return tenantDictionaryCache.get(TenantContext.getTenantId()).getDictionariesByDossier().get(dossierId);
return tenantDictionaryCache.get(TenantContext.getTenantId()).getDictionariesByDossier()
.get(dossierId);
}
@ -421,14 +472,14 @@ public class DictionaryService {
return commonsDictionaries.stream()
.map(cd -> new DictionaryModel(cd.getType(),
cd.getRank(),
cd.getColor(),
cd.isCaseInsensitive(),
cd.isHint(),
cd.getEntries(),
cd.getFalsePositives(),
cd.getFalseRecommendations(),
cd.isDossierDictionary()))
cd.getRank(),
cd.getColor(),
cd.isCaseInsensitive(),
cd.isHint(),
cd.getEntries(),
cd.getFalsePositives(),
cd.getFalseRecommendations(),
cd.isDossierDictionary()))
.collect(Collectors.toList());
}

View File

@ -30,7 +30,7 @@ import lombok.extern.slf4j.Slf4j;
public class EntityChangeLogService {
@Timed("redactmanager_computeChanges")
public boolean computeChanges(List<EntityLogEntry> previousEntityLogEntries, List<EntityLogEntry> newEntityLogEntries, ManualRedactions manualRedactions, int analysisNumber) {
public boolean computeChanges(List<EntityLogEntry> previousEntityLogEntries, List<EntityLogEntry> newEntityLogEntries, int analysisNumber) {
var now = OffsetDateTime.now();
if (previousEntityLogEntries.isEmpty()) {
@ -58,16 +58,12 @@ public class EntityChangeLogService {
entityLogEntry.getChanges().add(new Change(analysisNumber, changeType, now));
}
}
addRemovedEntriesAsRemoved(previousEntityLogEntries, newEntityLogEntries, manualRedactions, analysisNumber, now);
addRemovedEntriesAsRemoved(previousEntityLogEntries, newEntityLogEntries, analysisNumber, now);
return hasChanges;
}
private void addRemovedEntriesAsRemoved(List<EntityLogEntry> previousEntityLogEntries,
List<EntityLogEntry> newEntityLogEntries,
ManualRedactions manualRedactions,
int analysisNumber,
OffsetDateTime now) {
private void addRemovedEntriesAsRemoved(List<EntityLogEntry> previousEntityLogEntries, List<EntityLogEntry> newEntityLogEntries, int analysisNumber, OffsetDateTime now) {
Set<String> existingIds = newEntityLogEntries.stream()
.map(EntityLogEntry::getId)

View File

@ -70,7 +70,7 @@ public class EntityLogCreatorService {
List<EntityLogEntry> previousExistingEntityLogEntries = getPreviousEntityLogEntries(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
entityChangeLogService.computeChanges(previousExistingEntityLogEntries, entityLogEntries, analyzeRequest.getManualRedactions(), analyzeRequest.getAnalysisNumber());
entityChangeLogService.computeChanges(previousExistingEntityLogEntries, entityLogEntries, analyzeRequest.getAnalysisNumber());
return new EntityLog(redactionServiceSettings.getAnalysisVersion(),
analyzeRequest.getAnalysisNumber(),
@ -128,10 +128,8 @@ public class EntityLogCreatorService {
.collect(Collectors.toList());
previousEntityLog.getEntityLogEntry().removeAll(previousEntriesFromReAnalyzedSections);
boolean hasChanges = entityChangeLogService.computeChanges(previousEntriesFromReAnalyzedSections,
newEntityLogEntries,
analyzeRequest.getManualRedactions(),
analyzeRequest.getAnalysisNumber());
boolean hasChanges = entityChangeLogService.computeChanges(previousEntriesFromReAnalyzedSections, newEntityLogEntries, analyzeRequest.getAnalysisNumber());
previousEntityLog.getEntityLogEntry().addAll(newEntityLogEntries);
return updateVersionsAndReturnChanges(previousEntityLog, dictionaryVersion, analyzeRequest, hasChanges);

View File

@ -74,9 +74,9 @@ public class ManualChangesApplicationService {
.orElseThrow(() -> new NoSuchElementException("No redaction position with matching annotation id found!"));
positionOnPageToBeResized.setRectanglePerLine(manualResizeRedaction.getPositions()
.stream()
.map(ManualChangesApplicationService::toRectangle2D)
.collect(Collectors.toList()));
.stream()
.map(ManualChangesApplicationService::toRectangle2D)
.collect(Collectors.toList()));
entityToBeResized.getManualOverwrite().addChange(manualResizeRedaction);
@ -90,11 +90,17 @@ public class ManualChangesApplicationService {
if (closestEntity.isPresent()) {
copyValuesFromClosestEntity(entityToBeResized, manualResizeRedaction, closestEntity.get());
possibleEntities.values().stream().flatMap(Collection::stream).forEach(TextEntity::removeFromGraph);
possibleEntities.values()
.stream()
.flatMap(Collection::stream)
.forEach(TextEntity::removeFromGraph);
return;
}
possibleEntities.values().stream().flatMap(Collection::stream).forEach(TextEntity::removeFromGraph);
possibleEntities.values()
.stream()
.flatMap(Collection::stream)
.forEach(TextEntity::removeFromGraph);
if (node.hasParent()) {
node = node.getParent();
@ -110,14 +116,18 @@ public class ManualChangesApplicationService {
Set<SemanticNode> currentIntersectingNodes = new HashSet<>(entityToBeResized.getIntersectingNodes());
Set<SemanticNode> newIntersectingNodes = new HashSet<>(closestEntity.getIntersectingNodes());
Sets.difference(currentIntersectingNodes, newIntersectingNodes).forEach(removedNode -> removedNode.getEntities().remove(entityToBeResized));
Sets.difference(newIntersectingNodes, currentIntersectingNodes).forEach(addedNode -> addedNode.getEntities().add(entityToBeResized));
Sets.difference(currentIntersectingNodes, newIntersectingNodes)
.forEach(removedNode -> removedNode.getEntities().remove(entityToBeResized));
Sets.difference(newIntersectingNodes, currentIntersectingNodes)
.forEach(addedNode -> addedNode.getEntities().add(entityToBeResized));
Set<Page> currentIntersectingPages = new HashSet<>(entityToBeResized.getPages());
Set<Page> newIntersectingPages = new HashSet<>(closestEntity.getPages());
Sets.difference(currentIntersectingPages, newIntersectingPages).forEach(removedPage -> removedPage.getEntities().remove(entityToBeResized));
Sets.difference(newIntersectingPages, currentIntersectingPages).forEach(addedPage -> addedPage.getEntities().add(entityToBeResized));
Sets.difference(currentIntersectingPages, newIntersectingPages)
.forEach(removedPage -> removedPage.getEntities().remove(entityToBeResized));
Sets.difference(newIntersectingPages, currentIntersectingPages)
.forEach(addedPage -> addedPage.getEntities().add(entityToBeResized));
entityToBeResized.setDeepestFullyContainingNode(closestEntity.getDeepestFullyContainingNode());
entityToBeResized.setIntersectingNodes(new ArrayList<>(newIntersectingNodes));
@ -135,7 +145,10 @@ public class ManualChangesApplicationService {
if (manualResizeRedaction.getPositions().isEmpty() || manualResizeRedaction.getPositions() == null) {
return;
}
var bBox = RectangleTransformations.rectangle2DBBox(manualResizeRedaction.getPositions().stream().map(ManualChangesApplicationService::toRectangle2D).toList());
var bBox = RectangleTransformations.rectangle2DBBox(manualResizeRedaction.getPositions()
.stream()
.map(ManualChangesApplicationService::toRectangle2D)
.toList());
image.setPosition(bBox);
image.getManualOverwrite().addChange(manualResizeRedaction);
}

View File

@ -53,10 +53,13 @@ public class NotFoundImportedEntitiesService {
if (!notFoundEntities.isEmpty()) {
// imported redactions present, intersections must be added with merged imported redactions
Map<Integer, List<PrecursorEntity>> importedRedactionsMap = mapImportedRedactionsOnPage(notFoundEntities);
entityLog.getEntityLogEntry().stream().filter(entry -> !entry.getEngines().contains(Engine.IMPORTED)).forEach(redactionLogEntry -> {
redactionLogEntry.setImportedRedactionIntersections(new HashSet<>());
addIntersections(redactionLogEntry, importedRedactionsMap, analysisNumber);
});
entityLog.getEntityLogEntry()
.stream()
.filter(entry -> !entry.getEngines().contains(Engine.IMPORTED))
.forEach(redactionLogEntry -> {
redactionLogEntry.setImportedRedactionIntersections(new HashSet<>());
addIntersections(redactionLogEntry, importedRedactionsMap, analysisNumber);
});
}
}
@ -70,7 +73,10 @@ public class NotFoundImportedEntitiesService {
.map(RectangleWithPage::pageNumber)
.collect(Collectors.toSet());
pageNumbers.forEach(pageNumber -> importedRedactionsMap.put(pageNumber,
importedEntities.stream().filter(i -> pageNumber == i.getEntityPosition().get(0).pageNumber()).collect(Collectors.toList())));
importedEntities.stream()
.filter(i -> pageNumber == i.getEntityPosition()
.get(0).pageNumber())
.collect(Collectors.toList())));
return importedRedactionsMap;
}

View File

@ -15,8 +15,12 @@ public class ComponentComparator implements Comparator<Component> {
@Override
public int compare(Component component1, Component component2) {
var firstEntity1 = component1.getReferences().stream().min(EntityComparators.first());
var firstEntity2 = component2.getReferences().stream().min(EntityComparators.first());
var firstEntity1 = component1.getReferences()
.stream()
.min(EntityComparators.first());
var firstEntity2 = component2.getReferences()
.stream()
.min(EntityComparators.first());
if (firstEntity1.isEmpty() && firstEntity2.isEmpty()) {
return 0;
}

View File

@ -40,7 +40,8 @@ public class ComponentCreationService {
private static List<Entity> findEntitiesFromLongestSection(Collection<Entity> entities) {
var entitiesBySection = entities.stream().collect(Collectors.groupingBy(entity -> entity.getContainingNode().getHighestParent()));
var entitiesBySection = entities.stream()
.collect(Collectors.groupingBy(entity -> entity.getContainingNode().getHighestParent()));
Optional<SemanticNode> longestSection = entitiesBySection.entrySet()
.stream()
.sorted(Comparator.comparingInt(ComponentCreationService::getTotalLengthOfEntities).reversed())
@ -79,14 +80,20 @@ public class ComponentCreationService {
public void firstOrElse(String ruleIdentifier, String name, Collection<Entity> entities, String fallback) {
String valueDescription = String.format("First found value of type %s or else '%s'", joinTypes(entities), fallback);
String value = entities.stream().min(EntityComparators.first()).map(Entity::getValue).orElse(fallback);
String value = entities.stream()
.min(EntityComparators.first())
.map(Entity::getValue)
.orElse(fallback);
create(ruleIdentifier, name, value, valueDescription, entities);
}
private static String joinTypes(Collection<Entity> entities) {
return entities.stream().map(Entity::getType).distinct().collect(Collectors.joining(", "));
return entities.stream()
.map(Entity::getType)
.distinct()
.collect(Collectors.joining(", "));
}
@ -104,12 +111,12 @@ public class ComponentCreationService {
referencedEntities.addAll(references);
kieSession.insert(Component.builder()
.matchedRule(RuleIdentifier.fromString(ruleIdentifier))
.name(name)
.value(value)
.valueDescription(valueDescription)
.references(new LinkedList<>(references))
.build());
.matchedRule(RuleIdentifier.fromString(ruleIdentifier))
.name(name)
.value(value)
.valueDescription(valueDescription)
.references(new LinkedList<>(references))
.build());
}
@ -142,8 +149,11 @@ public class ComponentCreationService {
private static List<Entity> findEntitiesFromFirstSection(Collection<Entity> entities) {
var entitiesBySection = entities.stream().collect(Collectors.groupingBy(entity -> entity.getContainingNode().getHighestParent()));
Optional<SemanticNode> firstSection = entitiesBySection.keySet().stream().min(SemanticNodeComparators.first());
var entitiesBySection = entities.stream()
.collect(Collectors.groupingBy(entity -> entity.getContainingNode().getHighestParent()));
Optional<SemanticNode> firstSection = entitiesBySection.keySet()
.stream()
.min(SemanticNodeComparators.first());
if (firstSection.isEmpty()) {
return Collections.emptyList();
}
@ -188,7 +198,10 @@ public class ComponentCreationService {
public void joining(String ruleIdentifier, String name, Collection<Entity> entities, String delimiter) {
String valueDescription = String.format("Joining all values of type %s with '%s'", joinTypes(entities), delimiter);
String value = entities.stream().sorted(EntityComparators.first()).map(Entity::getValue).collect(Collectors.joining(delimiter));
String value = entities.stream()
.sorted(EntityComparators.first())
.map(Entity::getValue)
.collect(Collectors.joining(delimiter));
create(ruleIdentifier, name, value, valueDescription, entities);
}
@ -231,14 +244,20 @@ public class ComponentCreationService {
public void joiningUnique(String ruleIdentifier, String name, Collection<Entity> entities, String delimiter) {
String valueDescription = String.format("Joining all unique values of type %s with '%s'", joinTypes(entities), delimiter);
String value = entities.stream().sorted(EntityComparators.first()).map(Entity::getValue).distinct().collect(Collectors.joining(delimiter));
String value = entities.stream()
.sorted(EntityComparators.first())
.map(Entity::getValue)
.distinct()
.collect(Collectors.joining(delimiter));
create(ruleIdentifier, name, value, valueDescription, entities);
}
private static int getTotalLengthOfEntities(Map.Entry<SemanticNode, List<Entity>> entry) {
return entry.getValue().stream().mapToInt(Entity::getLength).sum();
return entry.getValue()
.stream()
.mapToInt(Entity::getLength).sum();
}
@ -293,7 +312,10 @@ public class ComponentCreationService {
*/
public void uniqueValueCount(String ruleIdentifier, String name, Collection<Entity> entities) {
long count = entities.stream().map(Entity::getValue).distinct().count();
long count = entities.stream()
.map(Entity::getValue)
.distinct()
.count();
create(ruleIdentifier, name, String.valueOf(count), "Number of unique values in the entity references", entities);
}
@ -307,18 +329,20 @@ public class ComponentCreationService {
*/
public void rowValueCount(String ruleIdentifier, String name, Collection<Entity> entities) {
entities.stream().collect(Collectors.groupingBy(this::getFirstTable)).forEach((optionalTable, groupedEntities) -> {
entities.stream()
.collect(Collectors.groupingBy(this::getFirstTable))
.forEach((optionalTable, groupedEntities) -> {
if (optionalTable.isEmpty()) {
return;
}
if (optionalTable.isEmpty()) {
return;
}
long count = groupedEntities.stream()
.collect(Collectors.groupingBy(entity -> getFirstTableCell(entity).map(TableCell::getRow).orElse(-1)))
.size();
long count = groupedEntities.stream()
.collect(Collectors.groupingBy(entity -> getFirstTableCell(entity).map(TableCell::getRow)
.orElse(-1))).size();
create(ruleIdentifier, name, String.valueOf(count), "Count rows with values in the entity references in same table", entities);
});
create(ruleIdentifier, name, String.valueOf(count), "Count rows with values in the entity references in same table", entities);
});
}
@ -334,18 +358,20 @@ public class ComponentCreationService {
if (entities.isEmpty()) {
return;
}
entities.stream().sorted(EntityComparators.first()).forEach(entity -> {
BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.ENGLISH);
iterator.setText(entity.getValue());
int start = iterator.first();
for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) {
create(ruleIdentifier,
name,
entity.getValue().substring(start, end).replaceAll("\\n", "").trim(),
String.format("Values of type '%s' as sentences", entity.getType()),
entity);
}
});
entities.stream()
.sorted(EntityComparators.first())
.forEach(entity -> {
BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.ENGLISH);
iterator.setText(entity.getValue());
int start = iterator.first();
for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) {
create(ruleIdentifier,
name,
entity.getValue().substring(start, end).replaceAll("\\n", "").trim(),
String.format("Values of type '%s' as sentences", entity.getType()),
entity);
}
});
}
@ -366,12 +392,12 @@ public class ComponentCreationService {
List<Entity> referenceList = new LinkedList<>();
referenceList.add(reference);
kieSession.insert(Component.builder()
.matchedRule(RuleIdentifier.fromString(ruleIdentifier))
.name(name)
.value(value)
.valueDescription(valueDescription)
.references(referenceList)
.build());
.matchedRule(RuleIdentifier.fromString(ruleIdentifier))
.name(name)
.value(value)
.valueDescription(valueDescription)
.references(referenceList)
.build());
}
@ -428,8 +454,10 @@ public class ComponentCreationService {
}
String formattedDateStrings = Stream.concat(//
dates.stream().sorted().map(date -> DateConverter.convertDate(date, resultFormat)), //
unparsedDates.stream())//
dates.stream()
.sorted()
.map(date -> DateConverter.convertDate(date, resultFormat)), //
unparsedDates.stream())//
.collect(Collectors.joining(", "));
create(ruleIdentifier, name, formattedDateStrings, valueDescription, entities);
@ -445,26 +473,34 @@ public class ComponentCreationService {
*/
public void joiningFromSameTableRow(String ruleIdentifier, String name, Collection<Entity> entities) {
String types = entities.stream().map(Entity::getType).sorted(Comparator.reverseOrder()).distinct().collect(Collectors.joining(", "));
String types = entities.stream()
.map(Entity::getType)
.sorted(Comparator.reverseOrder())
.distinct()
.collect(Collectors.joining(", "));
String valueDescription = String.format("Combine values of %s that are in same table row", types);
entities.stream().collect(Collectors.groupingBy(this::getFirstTable)).forEach((optionalTable, groupedEntities) -> {
if (optionalTable.isEmpty()) {
groupedEntities.forEach(entity -> create(ruleIdentifier, name, entity.getValue(), valueDescription, entity));
}
entities.stream()
.collect(Collectors.groupingBy(this::getFirstTable))
.forEach((optionalTable, groupedEntities) -> {
if (optionalTable.isEmpty()) {
groupedEntities.forEach(entity -> create(ruleIdentifier, name, entity.getValue(), valueDescription, entity));
}
groupedEntities.stream()
.filter(entity -> entity.getContainingNode() instanceof TableCell)
.collect(Collectors.groupingBy(entity -> ((TableCell) entity.getContainingNode()).getRow()))
.entrySet()
.stream()
.sorted(Comparator.comparingInt(Map.Entry::getKey))
.map(Map.Entry::getValue)
.forEach(entitiesInSameRow -> create(ruleIdentifier,
name,
entitiesInSameRow.stream().sorted(EntityComparators.first()).map(Entity::getValue).collect(Collectors.joining(", ")),
valueDescription,
entitiesInSameRow));
});
groupedEntities.stream()
.filter(entity -> entity.getContainingNode() instanceof TableCell)
.collect(Collectors.groupingBy(entity -> ((TableCell) entity.getContainingNode()).getRow())).entrySet()
.stream()
.sorted(Comparator.comparingInt(Map.Entry::getKey))
.map(Map.Entry::getValue)
.forEach(entitiesInSameRow -> create(ruleIdentifier,
name,
entitiesInSameRow.stream()
.sorted(EntityComparators.first())
.map(Entity::getValue)
.collect(Collectors.joining(", ")),
valueDescription,
entitiesInSameRow));
});
}
@ -521,12 +557,12 @@ public class ComponentCreationService {
public void create(String ruleIdentifier, String name, String value) {
kieSession.insert(Component.builder()
.matchedRule(RuleIdentifier.fromString(ruleIdentifier))
.name(name)
.value(value)
.valueDescription("")
.references(Collections.emptyList())
.build());
.matchedRule(RuleIdentifier.fromString(ruleIdentifier))
.name(name)
.value(value)
.valueDescription("")
.references(Collections.emptyList())
.build());
}
}

View File

@ -40,7 +40,9 @@ public class DocumentGraphMapper {
DocumentTree documentTree = new DocumentTree(document);
Context context = new Context(documentData, documentTree);
context.pageData.addAll(Arrays.stream(documentData.getDocumentPages()).map(DocumentGraphMapper::buildPage).toList());
context.pageData.addAll(Arrays.stream(documentData.getDocumentPages())
.map(DocumentGraphMapper::buildPage)
.toList());
context.documentTree.getRoot().getChildren().addAll(buildEntries(documentData.getDocumentStructure().getRoot().getChildren(), context));
@ -58,7 +60,9 @@ public class DocumentGraphMapper {
List<DocumentTree.Entry> newEntries = new LinkedList<>();
for (DocumentStructure.EntryData entryData : entries) {
List<Page> pages = Arrays.stream(entryData.getPageNumbers()).map(pageNumber -> getPage(pageNumber, context)).toList();
List<Page> pages = Arrays.stream(entryData.getPageNumbers())
.map(pageNumber -> getPage(pageNumber, context))
.toList();
SemanticNode node = switch (entryData.getType()) {
case SECTION -> buildSection(context);
@ -76,8 +80,10 @@ public class DocumentGraphMapper {
TextBlock textBlock = toTextBlock(entryData.getAtomicBlockIds(), context, node);
node.setLeafTextBlock(textBlock);
}
List<Integer> treeId = Arrays.stream(entryData.getTreeId()).boxed().toList();
entryData.getEngines().forEach(engine -> node.addEngine(engine));
List<Integer> treeId = Arrays.stream(entryData.getTreeId()).boxed()
.toList();
entryData.getEngines()
.forEach(engine -> node.addEngine(engine));
node.setTreeId(treeId);
switch (entryData.getType()) {
@ -150,16 +156,18 @@ public class DocumentGraphMapper {
private TextBlock toTextBlock(Long[] atomicTextBlockIds, Context context, SemanticNode parent) {
return Arrays.stream(atomicTextBlockIds).map(atomicTextBlockId -> getAtomicTextBlock(context, parent, atomicTextBlockId)).collect(new TextBlockCollector());
return Arrays.stream(atomicTextBlockIds)
.map(atomicTextBlockId -> getAtomicTextBlock(context, parent, atomicTextBlockId))
.collect(new TextBlockCollector());
}
private AtomicTextBlock getAtomicTextBlock(Context context, SemanticNode parent, Long atomicTextBlockId) {
return AtomicTextBlock.fromAtomicTextBlockData(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)),
context.documentPositionData.get(Math.toIntExact(atomicTextBlockId)),
parent,
getPage(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)).getPage(), context));
context.documentPositionData.get(Math.toIntExact(atomicTextBlockId)),
parent,
getPage(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)).getPage(), context));
}
@ -190,8 +198,10 @@ public class DocumentGraphMapper {
this.documentTree = documentTree;
this.pageData = new LinkedList<>();
this.documentTextData = Arrays.stream(documentData.getDocumentTextData()).toList();
this.documentPositionData = Arrays.stream(documentData.getDocumentPositionData()).toList();
this.documentTextData = Arrays.stream(documentData.getDocumentTextData())
.toList();
this.documentPositionData = Arrays.stream(documentData.getDocumentPositionData())
.toList();
}

View File

@ -11,6 +11,7 @@ public abstract class EntityComparators implements Comparator<Entity> {
return new FirstEntity();
}
public static class LongestEntity implements Comparator<Entity> {
@Override
@ -27,6 +28,7 @@ public abstract class EntityComparators implements Comparator<Entity> {
return new LongestEntity();
}
public static class FirstEntity implements Comparator<Entity> {
@Override

View File

@ -276,7 +276,8 @@ public class EntityCreationService {
"this is some text. a here is more text" and "here is more text". We only want to keep the latter.
*/
return entityTextRanges.stream()
.filter(boundary -> entityTextRanges.stream().noneMatch(innerBoundary -> !innerBoundary.equals(boundary) && innerBoundary.containedBy(boundary)))
.filter(boundary -> entityTextRanges.stream()
.noneMatch(innerBoundary -> !innerBoundary.equals(boundary) && innerBoundary.containedBy(boundary)))
.toList();
}
@ -351,10 +352,10 @@ public class EntityCreationService {
return tableNode.streamTableCells()
.flatMap(tableCell -> lineAfterBoundariesAcrossColumns(RedactionSearchUtility.findTextRangesByString(string, tableCell.getTextBlock()),
tableCell,
type,
entityType,
tableNode));
tableCell,
type,
entityType,
tableNode));
}
@ -362,10 +363,10 @@ public class EntityCreationService {
return tableNode.streamTableCells()
.flatMap(tableCell -> lineAfterBoundariesAcrossColumns(RedactionSearchUtility.findTextRangesByStringIgnoreCase(string, tableCell.getTextBlock()),
tableCell,
type,
entityType,
tableNode));
tableCell,
type,
entityType,
tableNode));
}
@ -500,7 +501,10 @@ public class EntityCreationService {
public Stream<TextEntity> bySemanticNodeParagraphsOnly(SemanticNode node, String type, EntityType entityType) {
return node.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(semanticNode -> bySemanticNode(semanticNode, type, entityType)).filter(Optional::isPresent).map(Optional::get);
return node.streamAllSubNodesOfType(NodeType.PARAGRAPH)
.map(semanticNode -> bySemanticNode(semanticNode, type, entityType))
.filter(Optional::isPresent)
.map(Optional::get);
}
@ -592,7 +596,11 @@ public class EntityCreationService {
TextRange trimmedTextRange = textRange.trim(node.getTextBlock());
TextEntity entity = TextEntity.initialEntityNode(trimmedTextRange, type, entityType, node);
if (node.getEntities().contains(entity)) {
Optional<TextEntity> optionalTextEntity = node.getEntities().stream().filter(e -> e.equals(entity) && e.type().equals(type)).peek(e -> e.addEngines(engines)).findAny();
Optional<TextEntity> optionalTextEntity = node.getEntities()
.stream()
.filter(e -> e.equals(entity) && e.type().equals(type))
.peek(e -> e.addEngines(engines))
.findAny();
if (optionalTextEntity.isEmpty()) {
return optionalTextEntity; // Entity has been recategorized and should not be created at all.
}
@ -647,17 +655,27 @@ public class EntityCreationService {
return entitiesToMerge.get(0);
}
TextEntity mergedEntity = TextEntity.initialEntityNode(TextRange.merge(entitiesToMerge.stream().map(TextEntity::getTextRange).toList()), type, entityType, node);
mergedEntity.addEngines(entitiesToMerge.stream().flatMap(entityNode -> entityNode.getEngines().stream()).collect(Collectors.toSet()));
entitiesToMerge.stream().map(TextEntity::getMatchedRuleList).flatMap(Collection::stream).forEach(matchedRule -> mergedEntity.getMatchedRuleList().add(matchedRule));
TextEntity mergedEntity = TextEntity.initialEntityNode(TextRange.merge(entitiesToMerge.stream()
.map(TextEntity::getTextRange)
.toList()), type, entityType, node);
mergedEntity.addEngines(entitiesToMerge.stream()
.flatMap(entityNode -> entityNode.getEngines()
.stream())
.collect(Collectors.toSet()));
entitiesToMerge.stream()
.map(TextEntity::getMatchedRuleList)
.flatMap(Collection::stream)
.forEach(matchedRule -> mergedEntity.getMatchedRuleList().add(matchedRule));
entitiesToMerge.stream()
.map(TextEntity::getManualOverwrite)
.map(ManualChangeOverwrite::getManualChangeLog)
.flatMap(Collection::stream)
.forEach(manualChange -> mergedEntity.getManualOverwrite().addChange(manualChange));
mergedEntity.setDictionaryEntry(entitiesToMerge.stream().anyMatch(TextEntity::isDictionaryEntry));
mergedEntity.setDossierDictionaryEntry(entitiesToMerge.stream().anyMatch(TextEntity::isDossierDictionaryEntry));
mergedEntity.setDictionaryEntry(entitiesToMerge.stream()
.anyMatch(TextEntity::isDictionaryEntry));
mergedEntity.setDossierDictionaryEntry(entitiesToMerge.stream()
.anyMatch(TextEntity::isDossierDictionaryEntry));
addEntityToGraph(mergedEntity, node);
insertToKieSession(mergedEntity);
@ -667,7 +685,8 @@ public class EntityCreationService {
public Stream<TextEntity> copyEntities(List<TextEntity> entities, String type, EntityType entityType, SemanticNode node) {
return entities.stream().map(entity -> copyEntity(entity, type, entityType, node));
return entities.stream()
.map(entity -> copyEntity(entity, type, entityType, node));
}
@ -744,7 +763,8 @@ public class EntityCreationService {
try {
if (node.getEntities().contains(entity)) {
// If entity already exists and it has a different text range, we add the text range to the list of duplicated text ranges
node.getEntities().stream()//
node.getEntities()
.stream()//
.filter(e -> e.equals(entity))//
.filter(e -> !e.getTextRange().equals(entity.getTextRange()))//
.findAny()//
@ -770,8 +790,10 @@ public class EntityCreationService {
SemanticNode deepestSharedNode = entityToDuplicate.getIntersectingNodes()
.stream()
.sorted(Comparator.comparingInt(n -> -n.getTreeId().size()))
.filter(intersectingNode -> entityToDuplicate.getDuplicateTextRanges().stream().allMatch(tr -> intersectingNode.getTextRange().contains(tr)) && //
intersectingNode.getTextRange().contains(entityToDuplicate.getTextRange()))
.filter(intersectingNode -> entityToDuplicate.getDuplicateTextRanges()
.stream()
.allMatch(tr -> intersectingNode.getTextRange().contains(tr)) && //
intersectingNode.getTextRange().contains(entityToDuplicate.getTextRange()))
.findFirst()
.orElse(node.getDocumentTree().getRoot().getNode());
@ -784,7 +806,8 @@ public class EntityCreationService {
return;
}
additionalIntersectingNode.getEntities().add(entityToDuplicate);
additionalIntersectingNode.getPages(newTextRange).forEach(page -> page.getEntities().add(entityToDuplicate));
additionalIntersectingNode.getPages(newTextRange)
.forEach(page -> page.getEntities().add(entityToDuplicate));
entityToDuplicate.addIntersectingNode(additionalIntersectingNode);
});
}
@ -806,5 +829,4 @@ public class EntityCreationService {
addEntityToNodeEntitySets(entity);
}
}

View File

@ -11,7 +11,6 @@ import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBl
public class EntityCreationUtility {
public static void checkIfBothStartAndEndAreEmpty(String start, String end) {
checkIfBothStartAndEndAreEmpty(List.of(start), List.of(end));
@ -57,7 +56,8 @@ public class EntityCreationUtility {
public static void addEntityToNodeEntitySets(TextEntity entity) {
entity.getIntersectingNodes().forEach(node -> node.getEntities().add(entity));
entity.getIntersectingNodes()
.forEach(node -> node.getEntities().add(entity));
}

View File

@ -59,7 +59,9 @@ public class EntityEnrichmentService {
private static List<String> splitToWordsAndRemoveEmptyWords(String textAfter) {
return Arrays.stream(textAfter.split(" ")).filter(word -> !Objects.equals("", word)).toList();
return Arrays.stream(textAfter.split(" "))
.filter(word -> !Objects.equals("", word))
.toList();
}

View File

@ -47,7 +47,9 @@ public class EntityFindingUtility {
}
public Optional<TextEntity> findClosestEntityAndReturnEmptyIfNotFound(PrecursorEntity precursorEntity, Map<String, List<TextEntity>> entitiesWithSameValue, double matchThreshold) {
public Optional<TextEntity> findClosestEntityAndReturnEmptyIfNotFound(PrecursorEntity precursorEntity,
Map<String, List<TextEntity>> entitiesWithSameValue,
double matchThreshold) {
if (precursorEntity.getValue() == null) {
return Optional.empty();
@ -73,11 +75,15 @@ public class EntityFindingUtility {
ClosestEntity closestEntity = optionalClosestEntity.get();
if (closestEntity.getDistance() > matchThreshold) {
log.warn("For entity {} on page {} with positions {} distance to closest found entity is {} and therefore higher than the threshold of {}",
precursorEntity.getValue(),
precursorEntity.getEntityPosition().get(0).pageNumber(),
precursorEntity.getEntityPosition().stream().map(RectangleWithPage::rectangle2D).toList(),
closestEntity.getDistance(),
matchThreshold);
precursorEntity.getValue(),
precursorEntity.getEntityPosition()
.get(0).pageNumber(),
precursorEntity.getEntityPosition()
.stream()
.map(RectangleWithPage::rectangle2D)
.toList(),
closestEntity.getDistance(),
matchThreshold);
return Optional.empty();
}
@ -93,8 +99,14 @@ public class EntityFindingUtility {
private static boolean pagesMatch(TextEntity entity, List<RectangleWithPage> originalPositions) {
Set<Integer> entityPageNumbers = entity.getPositionsOnPagePerPage().stream().map(PositionOnPage::getPage).map(Page::getNumber).collect(Collectors.toSet());
Set<Integer> originalPageNumbers = originalPositions.stream().map(RectangleWithPage::pageNumber).collect(Collectors.toSet());
Set<Integer> entityPageNumbers = entity.getPositionsOnPagePerPage()
.stream()
.map(PositionOnPage::getPage)
.map(Page::getNumber)
.collect(Collectors.toSet());
Set<Integer> originalPageNumbers = originalPositions.stream()
.map(RectangleWithPage::pageNumber)
.collect(Collectors.toSet());
return entityPageNumbers.containsAll(originalPageNumbers);
}
@ -105,15 +117,16 @@ public class EntityFindingUtility {
return Double.MAX_VALUE;
}
return originalPositions.stream()
.mapToDouble(rectangleWithPage -> calculateMinDistancePerRectangle(entity, rectangleWithPage.pageNumber(), rectangleWithPage.rectangle2D()))
.average()
.mapToDouble(rectangleWithPage -> calculateMinDistancePerRectangle(entity, rectangleWithPage.pageNumber(), rectangleWithPage.rectangle2D())).average()
.orElse(Double.MAX_VALUE);
}
private static long countRectangles(TextEntity entity) {
return entity.getPositionsOnPagePerPage().stream().mapToLong(redactionPosition -> redactionPosition.getRectanglePerLine().size()).sum();
return entity.getPositionsOnPagePerPage()
.stream()
.mapToLong(redactionPosition -> redactionPosition.getRectanglePerLine().size()).sum();
}
@ -144,22 +157,33 @@ public class EntityFindingUtility {
double maxY2 = Math.max(rectangle2.getMinY(), rectangle2.getMaxY());
return Math.abs(minX1 - minX2) //
+ Math.abs(minY1 - minY2) //
+ Math.abs(maxX1 - maxX2) //
+ Math.abs(maxY1 - maxY2);
+ Math.abs(minY1 - minY2) //
+ Math.abs(maxX1 - maxX2) //
+ Math.abs(maxY1 - maxY2);
}
public Map<String, List<TextEntity>> findAllPossibleEntitiesAndGroupByValue(SemanticNode node, List<PrecursorEntity> manualEntities) {
Set<Integer> pageNumbers = manualEntities.stream().flatMap(entry -> entry.getEntityPosition().stream().map(RectangleWithPage::pageNumber)).collect(Collectors.toSet());
Set<String> entryValues = manualEntities.stream().map(PrecursorEntity::getValue).filter(Objects::nonNull).map(String::toLowerCase).collect(Collectors.toSet());
Set<Integer> pageNumbers = manualEntities.stream()
.flatMap(entry -> entry.getEntityPosition()
.stream()
.map(RectangleWithPage::pageNumber))
.collect(Collectors.toSet());
Set<String> entryValues = manualEntities.stream()
.map(PrecursorEntity::getValue)
.filter(Objects::nonNull)
.map(String::toLowerCase)
.collect(Collectors.toSet());
if (!pageNumbers.stream().allMatch(node::onPage)) {
if (!pageNumbers.stream()
.allMatch(node::onPage)) {
throw new IllegalArgumentException(format("SemanticNode \"%s\" does not contain these pages %s, it has pages: %s",
node,
pageNumbers.stream().filter(pageNumber -> !node.onPage(pageNumber)).toList(),
node.getPages()));
node,
pageNumbers.stream()
.filter(pageNumber -> !node.onPage(pageNumber))
.toList(),
node.getPages()));
}
SearchImplementation searchImplementation = new SearchImplementation(entryValues, true);

View File

@ -52,9 +52,13 @@ public class ImportedRedactionEntryService {
private List<BaseAnnotation> allManualChangesExceptAdd(ManualRedactions manualRedactions) {
return Stream.of(manualRedactions.getForceRedactions(),
manualRedactions.getResizeRedactions(),
manualRedactions.getRecategorizations(),
manualRedactions.getIdsToRemove(),
manualRedactions.getLegalBasisChanges()).flatMap(Collection::stream).map(baseAnnotation -> (BaseAnnotation) baseAnnotation).toList();
manualRedactions.getResizeRedactions(),
manualRedactions.getRecategorizations(),
manualRedactions.getIdsToRemove(),
manualRedactions.getLegalBasisChanges())
.flatMap(Collection::stream)
.map(baseAnnotation -> (BaseAnnotation) baseAnnotation)
.toList();
}
}

View File

@ -14,12 +14,14 @@ public class IntersectingNodeVisitor implements NodeVisitor {
private Set<SemanticNode> intersectingNodes;
private final TextRange textRange;
public IntersectingNodeVisitor(TextRange textRange) {
this.textRange = textRange;
this.intersectingNodes = new HashSet<>();
}
@Override
public void visit(SemanticNode node) {

View File

@ -31,7 +31,8 @@ public class ManualRedactionEntryService {
List<PrecursorEntity> notFoundManualRedactionEntries = Collections.emptyList();
if (analyzeRequest.getManualRedactions() != null) {
notFoundManualRedactionEntries = entityFromPrecursorCreationService.createEntitiesIfFoundAndReturnNotFoundEntries(analyzeRequest.getManualRedactions(),
document, dossierTemplateId);
document,
dossierTemplateId);
log.info("Added Manual redaction entries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
}
@ -51,10 +52,13 @@ public class ManualRedactionEntryService {
private List<BaseAnnotation> allManualChangesExceptAdd(ManualRedactions manualRedactions) {
return Stream.of(manualRedactions.getForceRedactions(),
manualRedactions.getResizeRedactions(),
manualRedactions.getRecategorizations(),
manualRedactions.getIdsToRemove(),
manualRedactions.getLegalBasisChanges()).flatMap(Collection::stream).map(baseAnnotation -> (BaseAnnotation) baseAnnotation).toList();
manualRedactions.getResizeRedactions(),
manualRedactions.getRecategorizations(),
manualRedactions.getIdsToRemove(),
manualRedactions.getLegalBasisChanges())
.flatMap(Collection::stream)
.map(baseAnnotation -> (BaseAnnotation) baseAnnotation)
.toList();
}
}

View File

@ -44,9 +44,11 @@ public class NerEntitiesAdapter {
public NerEntities toNerEntities(NerEntitiesModel nerEntitiesModel, Document document) {
return new NerEntities(addOffsetsAndFlatten(getStringStartOffsetsForMainSections(document),
nerEntitiesModel).map(nerEntityModel -> new NerEntities.NerEntity(nerEntityModel.getValue(),
new TextRange(nerEntityModel.getStartOffset(), nerEntityModel.getEndOffset()),
nerEntityModel.getType())).toList());
nerEntitiesModel).map(nerEntityModel -> new NerEntities.NerEntity(nerEntityModel.getValue(),
new TextRange(nerEntityModel.getStartOffset(),
nerEntityModel.getEndOffset()),
nerEntityModel.getType()))
.toList());
}
@ -83,7 +85,9 @@ public class NerEntitiesAdapter {
List<List<NerEntities.NerEntity>> entityClusters = new LinkedList<>();
List<NerEntities.NerEntity> startEntitiesOfEssentialType = sortedEntities.stream().filter(e -> essentialTypes.contains(e.type())).toList();
List<NerEntities.NerEntity> startEntitiesOfEssentialType = sortedEntities.stream()
.filter(e -> essentialTypes.contains(e.type()))
.toList();
for (NerEntities.NerEntity startEntity : startEntitiesOfEssentialType) {
List<NerEntities.NerEntity> currentCluster = new LinkedList<>();
entityClusters.add(currentCluster);
@ -105,7 +109,10 @@ public class NerEntitiesAdapter {
}
}
return entityClusters.stream().filter(cluster -> cluster.size() >= minPartsToCombine).map(NerEntitiesAdapter::toContainingBoundary).distinct();
return entityClusters.stream()
.filter(cluster -> cluster.size() >= minPartsToCombine)
.map(NerEntitiesAdapter::toContainingBoundary)
.distinct();
}
@ -124,17 +131,18 @@ public class NerEntitiesAdapter {
public Stream<TextRange> combineNerEntitiesToCbiAddressDefaults(NerEntities entityRecognitionEntities) {
return combineNerEntities(entityRecognitionEntities,
CBI_ADDRESS_ESSENTIAL_TYPES,
CBI_ADDRESS_TYPES_TO_COMBINE,
MAX_DISTANCE_BETWEEN_PARTS,
MIN_PARTS_TO_COMBINE,
ALLOW_DUPLICATES);
CBI_ADDRESS_ESSENTIAL_TYPES,
CBI_ADDRESS_TYPES_TO_COMBINE,
MAX_DISTANCE_BETWEEN_PARTS,
MIN_PARTS_TO_COMBINE,
ALLOW_DUPLICATES);
}
private static boolean isDuplicate(List<NerEntities.NerEntity> currentCluster, NerEntities.NerEntity entity, boolean allowDuplicates) {
return allowDuplicates || currentCluster.stream().anyMatch(e -> e.type().equals(entity.type()));
return allowDuplicates || currentCluster.stream()
.anyMatch(e -> e.type().equals(entity.type()));
}
@ -146,24 +154,34 @@ public class NerEntitiesAdapter {
private static TextRange toContainingBoundary(List<NerEntities.NerEntity> nerEntities) {
return TextRange.merge(nerEntities.stream().map(NerEntities.NerEntity::textRange).toList());
return TextRange.merge(nerEntities.stream()
.map(NerEntities.NerEntity::textRange)
.toList());
}
private static Stream<EntityRecognitionEntity> addOffsetsAndFlatten(List<Integer> stringOffsetsForMainSections, NerEntitiesModel nerEntitiesModel) {
nerEntitiesModel.getData().forEach((sectionNumber, listOfNerEntities) -> listOfNerEntities.forEach(entityRecognitionEntity -> {
int newStartOffset = entityRecognitionEntity.getStartOffset() + stringOffsetsForMainSections.get(sectionNumber);
entityRecognitionEntity.setStartOffset(newStartOffset);
entityRecognitionEntity.setEndOffset(newStartOffset + entityRecognitionEntity.getValue().length());
}));
return nerEntitiesModel.getData().values().stream().flatMap(Collection::stream);
nerEntitiesModel.getData()
.forEach((sectionNumber, listOfNerEntities) -> listOfNerEntities.forEach(entityRecognitionEntity -> {
int newStartOffset = entityRecognitionEntity.getStartOffset() + stringOffsetsForMainSections.get(sectionNumber);
entityRecognitionEntity.setStartOffset(newStartOffset);
entityRecognitionEntity.setEndOffset(newStartOffset + entityRecognitionEntity.getValue().length());
}));
return nerEntitiesModel.getData().values()
.stream()
.flatMap(Collection::stream);
}
private static List<Integer> getStringStartOffsetsForMainSections(Document document) {
return document.getMainSections().stream().map(Section::getTextBlock).map(TextBlock::getTextRange).map(TextRange::start).toList();
return document.getMainSections()
.stream()
.map(Section::getTextBlock)
.map(TextBlock::getTextRange)
.map(TextRange::start)
.toList();
}
}

View File

@ -5,4 +5,5 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNo
public interface NodeVisitor {
void visit(SemanticNode node);
}

View File

@ -43,7 +43,9 @@ public class PropertiesMapper {
private Rectangle2D parseRectangle2D(String bBox) {
List<Float> floats = Arrays.stream(bBox.split(DocumentStructure.RECTANGLE_DELIMITER)).map(Float::parseFloat).toList();
List<Float> floats = Arrays.stream(bBox.split(DocumentStructure.RECTANGLE_DELIMITER))
.map(Float::parseFloat)
.toList();
return new Rectangle2D.Float(floats.get(0), floats.get(1), floats.get(2), floats.get(3));
}

View File

@ -12,6 +12,7 @@ public abstract class SemanticNodeComparators implements Comparator<SemanticNode
return new FirstSemanticNode();
}
public static class FirstSemanticNode extends SemanticNodeComparators {
@Override

View File

@ -50,7 +50,9 @@ public class ComponentDroolsExecutionService {
.filter(entityLogEntry -> entityLogEntry.getState().equals(EntryState.APPLIED))
.map(entry -> Entity.fromEntityLogEntry(entry, document))
.forEach(kieSession::insert);
fileAttributes.stream().filter(f -> f.getValue() != null).forEach(kieSession::insert);
fileAttributes.stream()
.filter(f -> f.getValue() != null)
.forEach(kieSession::insert);
CompletableFuture<Void> completableFuture = CompletableFuture.supplyAsync(() -> {
kieSession.fireAllRules();
@ -58,7 +60,8 @@ public class ComponentDroolsExecutionService {
});
try {
completableFuture.orTimeout(settings.getDroolsExecutionTimeoutSecs(), TimeUnit.SECONDS).get();
completableFuture.orTimeout(settings.getDroolsExecutionTimeoutSecs(), TimeUnit.SECONDS)
.get();
} catch (ExecutionException e) {
kieSession.dispose();
if (e.getCause() instanceof TimeoutException) {
@ -71,7 +74,9 @@ public class ComponentDroolsExecutionService {
}
List<FileAttribute> resultingFileAttributes = getFileAttributes(kieSession);
List<Component> components = getComponents(kieSession).stream().sorted(ComponentComparator.first()).toList();
List<Component> components = getComponents(kieSession).stream()
.sorted(ComponentComparator.first())
.toList();
kieSession.dispose();
return components;
}

View File

@ -60,6 +60,7 @@ public class DroolsSyntaxValidationService {
return droolsCompilerSyntaxValidation;
}
private DroolsSyntaxDeprecatedWarnings getWarningsForDeprecatedImports(RuleFileBluePrint ruleFileBluePrint) {
if (!deprecatedElementsFinder.getDeprecatedClasses().isEmpty()) {
@ -70,13 +71,13 @@ public class DroolsSyntaxValidationService {
String sb = "Following imports are deprecated: \n" + matches.stream()
.map(m -> imports.substring(m.startIndex(), m.endIndex()))
.collect(Collectors.joining("\n"));
return DroolsSyntaxDeprecatedWarnings.builder().line(ruleFileBluePrint.getImportLine()).column(0).message(sb)
.build();
return DroolsSyntaxDeprecatedWarnings.builder().line(ruleFileBluePrint.getImportLine()).column(0).message(sb).build();
}
}
return null;
}
private List<DroolsSyntaxDeprecatedWarnings> getWarningsForDeprecatedRules(RuleFileBluePrint ruleFileBluePrint) {
List<DroolsSyntaxDeprecatedWarnings> warningMessages = new ArrayList<>();
@ -96,8 +97,7 @@ public class DroolsSyntaxValidationService {
.distinct()
.map(dm -> String.format("Method %s might be deprecated because of \n %s \n", dm, deprecatedMethodsSignatureMap.get(dm)))
.collect(Collectors.joining("\n"));
warningMessages.add(DroolsSyntaxDeprecatedWarnings.builder().line(basicRule.getLine()).column(0).message(warningMessage)
.build());
warningMessages.add(DroolsSyntaxDeprecatedWarnings.builder().line(basicRule.getLine()).column(0).message(warningMessage).build());
}
}
}
@ -160,7 +160,10 @@ public class DroolsSyntaxValidationService {
String requiredAgendaGroup = "LOCAL_DICTIONARY_ADDS";
if (!validateAgendaGroupIsPresent(ruleFileBluePrint, requiredAgendaGroup)) {
customSyntaxValidation.getDroolsSyntaxErrorMessages()
.add(DroolsSyntaxErrorMessage.builder().line(0).column(0).message(String.format("At least one rule with Agenda-Group '%s' required!", requiredAgendaGroup))
.add(DroolsSyntaxErrorMessage.builder()
.line(0)
.column(0)
.message(String.format("At least one rule with Agenda-Group '%s' required!", requiredAgendaGroup))
.build());
}
}
@ -214,15 +217,13 @@ public class DroolsSyntaxValidationService {
List<DroolsSyntaxErrorMessage> droolsSyntaxErrorMessages = errorMessages.stream()
.map(this::buildDroolsSyntaxErrorMessage)
.collect(Collectors.toList());
return DroolsSyntaxValidation.builder().droolsSyntaxErrorMessages(droolsSyntaxErrorMessages)
.build();
return DroolsSyntaxValidation.builder().droolsSyntaxErrorMessages(droolsSyntaxErrorMessages).build();
}
private DroolsSyntaxErrorMessage buildDroolsSyntaxErrorMessage(Message message) {
return DroolsSyntaxErrorMessage.builder().line(message.getLine()).column(message.getColumn()).message(message.getText())
.build();
return DroolsSyntaxErrorMessage.builder().line(message.getLine()).column(message.getColumn()).message(message.getText()).build();
}
}

View File

@ -30,8 +30,7 @@ public class KieContainerCreationService {
private final RulesClient rulesClient;
@Observed(name = "KieContainerCreationService",
contextualName = "get-kie-container")
@Observed(name = "KieContainerCreationService", contextualName = "get-kie-container")
public KieWrapper getLatestKieContainer(String dossierTemplateId, RuleFileType ruleFileType) {
try {

View File

@ -52,7 +52,12 @@ public class RuleFileParser {
}
}
String imports = ruleString.substring(0, packageDescr.getImports().stream().mapToInt(ImportDescr::getEndCharacter).max().orElseThrow() + 1);
String imports = ruleString.substring(0,
packageDescr.getImports()
.stream()
.mapToInt(ImportDescr::getEndCharacter)
.max()
.orElseThrow() + 1);
String globals = packageDescr.getGlobals()
.stream()
.map(globalDescr -> ruleString.substring(globalDescr.getStartCharacter(), globalDescr.getEndCharacter()))
@ -61,11 +66,20 @@ public class RuleFileParser {
List<RuleClass> ruleClasses = buildRuleClasses(allRules);
return new RuleFileBluePrint(imports.trim(),
packageDescr.getImports().stream().findFirst().map(ImportDescr::getLine).orElse(0),
globals.trim(),
packageDescr.getGlobals().stream().findFirst().map(GlobalDescr::getLine).orElse(0), allQueries,
ruleClasses,
customDroolsSyntaxValidation);
packageDescr.getImports()
.stream()
.findFirst()
.map(ImportDescr::getLine)
.orElse(0),
globals.trim(),
packageDescr.getGlobals()
.stream()
.findFirst()
.map(GlobalDescr::getLine)
.orElse(0),
allQueries,
ruleClasses,
customDroolsSyntaxValidation);
}
@ -91,11 +105,12 @@ public class RuleFileParser {
Matcher matcher = ruleIdentifierInCodeFinder.matcher(code);
while (matcher.find()) {
String identifierInCode = code.substring(matcher.start(1), matcher.end(1));
long line = code.substring(0, matcher.start(1)).lines().count() + lineOffset - 1;
long line = code.substring(0, matcher.start(1)).lines()
.count() + lineOffset - 1;
if (!identifier.equals(identifierInCode)) {
customDroolsSyntaxValidation.addErrorMessage((int) line,
0,
String.format("Rule identifier %s is not equal to rule identifier %s in rule name!", identifierInCode, identifier));
0,
String.format("Rule identifier %s is not equal to rule identifier %s in rule name!", identifierInCode, identifier));
}
}
}
@ -104,23 +119,35 @@ public class RuleFileParser {
private void addDuplicateRuleIdentifierErrorMessage(RuleDescr rule, BasicRule basicRule, DroolsSyntaxValidation customDroolsSyntaxValidation) {
customDroolsSyntaxValidation.addErrorMessage(rule.getLine(),
rule.getColumn(),
String.format("RuleIdentifier: %s is a duplicate, duplicates are not allowed!", basicRule.getIdentifier()));
rule.getColumn(),
String.format("RuleIdentifier: %s is a duplicate, duplicates are not allowed!", basicRule.getIdentifier()));
}
private List<RuleClass> buildRuleClasses(List<BasicRule> allRules) {
List<RuleType> ruleTypeOrder = allRules.stream().map(BasicRule::getIdentifier).map(RuleIdentifier::type).distinct().toList();
Map<RuleType, List<BasicRule>> rulesPerType = allRules.stream().collect(groupingBy(rule -> rule.getIdentifier().type()));
return ruleTypeOrder.stream().map(type -> new RuleClass(type, groupingByGroup(rulesPerType.get(type)))).collect(Collectors.toList());
List<RuleType> ruleTypeOrder = allRules.stream()
.map(BasicRule::getIdentifier)
.map(RuleIdentifier::type)
.distinct()
.toList();
Map<RuleType, List<BasicRule>> rulesPerType = allRules.stream()
.collect(groupingBy(rule -> rule.getIdentifier().type()));
return ruleTypeOrder.stream()
.map(type -> new RuleClass(type, groupingByGroup(rulesPerType.get(type))))
.collect(Collectors.toList());
}
private List<RuleUnit> groupingByGroup(List<BasicRule> rules) {
Map<Integer, List<BasicRule>> rulesPerUnit = rules.stream().collect(groupingBy(rule -> rule.getIdentifier().unit()));
return rulesPerUnit.keySet().stream().sorted().map(unit -> new RuleUnit(unit, rulesPerUnit.get(unit))).collect(Collectors.toList());
Map<Integer, List<BasicRule>> rulesPerUnit = rules.stream()
.collect(groupingBy(rule -> rule.getIdentifier().unit()));
return rulesPerUnit.keySet()
.stream()
.sorted()
.map(unit -> new RuleUnit(unit, rulesPerUnit.get(unit)))
.collect(Collectors.toList());
}
}

View File

@ -16,6 +16,7 @@ public class ObservedStorageService {
@Observed(name = "RedactionStorageService", contextualName = "get-document-data")
public DocumentData getDocumentData(String dossierId, String fileId) {
return redactionStorageService.getDocumentData(dossierId, fileId);
}

View File

@ -80,9 +80,12 @@ public class RedactionStorageService {
try {
ImportedRedactionsPerPage importedRedactionsPerPage = storageService.readJSONObject(TenantContext.getTenantId(),
StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMPORTED_REDACTIONS),
ImportedRedactionsPerPage.class);
return new ImportedRedactions(importedRedactionsPerPage.getImportedRedactions().values().stream().flatMap(List::stream).collect(Collectors.toList()));
StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMPORTED_REDACTIONS),
ImportedRedactionsPerPage.class);
return new ImportedRedactions(importedRedactionsPerPage.getImportedRedactions().values()
.stream()
.flatMap(List::stream)
.collect(Collectors.toList()));
} catch (StorageObjectDoesNotExist e) {
log.debug("Imported redactions not available.");
return new ImportedRedactions();
@ -90,14 +93,13 @@ public class RedactionStorageService {
}
@Timed("redactmanager_getImportedRedactions")
public ImportedRedactionsPerPage getImportedRedactionsPerPage(String dossierId, String fileId) {
try {
return storageService.readJSONObject(TenantContext.getTenantId(),
StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMPORTED_REDACTIONS),
ImportedRedactionsPerPage.class);
StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMPORTED_REDACTIONS),
ImportedRedactionsPerPage.class);
} catch (StorageObjectDoesNotExist e) {
log.debug("Imported redactions not available.");
return null;
@ -111,12 +113,12 @@ public class RedactionStorageService {
try {
RedactionLog redactionLog = storageService.readJSONObject(TenantContext.getTenantId(),
StorageIdUtils.getStorageId(dossierId, fileId, FileType.REDACTION_LOG),
RedactionLog.class);
StorageIdUtils.getStorageId(dossierId, fileId, FileType.REDACTION_LOG),
RedactionLog.class);
redactionLog.setRedactionLogEntry(redactionLog.getRedactionLogEntry()
.stream()
.filter(entry -> !(entry.getPositions() == null || entry.getPositions().isEmpty()))
.collect(Collectors.toList()));
.stream()
.filter(entry -> !(entry.getPositions() == null || entry.getPositions().isEmpty()))
.collect(Collectors.toList()));
return redactionLog;
} catch (StorageObjectDoesNotExist e) {
log.debug("RedactionLog not available.");
@ -132,9 +134,9 @@ public class RedactionStorageService {
try {
EntityLog entityLog = storageService.readJSONObject(TenantContext.getTenantId(), StorageIdUtils.getStorageId(dossierId, fileId, FileType.ENTITY_LOG), EntityLog.class);
entityLog.setEntityLogEntry(entityLog.getEntityLogEntry()
.stream()
.filter(entry -> !(entry.getPositions() == null || entry.getPositions().isEmpty()))
.collect(Collectors.toList()));
.stream()
.filter(entry -> !(entry.getPositions() == null || entry.getPositions().isEmpty()))
.collect(Collectors.toList()));
return entityLog;
} catch (StorageObjectDoesNotExist e) {
log.debug("EntityLog not available.");
@ -156,17 +158,17 @@ public class RedactionStorageService {
try {
return DocumentData.builder()
.documentStructure(storageService.readJSONObject(TenantContext.getTenantId(),
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_STRUCTURE),
DocumentStructure.class))
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_STRUCTURE),
DocumentStructure.class))
.documentTextData(storageService.readJSONObject(TenantContext.getTenantId(),
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_TEXT),
DocumentTextData[].class))
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_TEXT),
DocumentTextData[].class))
.documentPositionData(storageService.readJSONObject(TenantContext.getTenantId(),
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_POSITION),
DocumentPositionData[].class))
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_POSITION),
DocumentPositionData[].class))
.documentPages(storageService.readJSONObject(TenantContext.getTenantId(),
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_PAGES),
DocumentPage[].class))
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_PAGES),
DocumentPage[].class))
.build();
} catch (StorageObjectDoesNotExist e) {
log.debug("DocumentData not available.");

View File

@ -11,6 +11,7 @@ public class RuleManagementResources {
private static final String folderPrefix = "drools";
@SneakyThrows
public static InputStream getBaseRuleFileInputStream() {
@ -26,6 +27,7 @@ public class RuleManagementResources {
}
}
@SneakyThrows
public static InputStream getBaseComponentRuleFileInputStream() {

View File

@ -18,11 +18,11 @@ import lombok.extern.slf4j.Slf4j;
public class DateConverter {
static List<SimpleDateFormat> formats = List.of(new SimpleDateFormat("dd MMM yy", Locale.ENGLISH),
new SimpleDateFormat("dd MM yyyy", Locale.ENGLISH),
new SimpleDateFormat("dd MM yyyy.", Locale.ENGLISH),
new SimpleDateFormat("dd MMMM yyyy", Locale.ENGLISH),
new SimpleDateFormat("MMMM dd, yyyy", Locale.ENGLISH),
new SimpleDateFormat("dd-MMM-yyyy", Locale.ENGLISH));
new SimpleDateFormat("dd MM yyyy", Locale.ENGLISH),
new SimpleDateFormat("dd MM yyyy.", Locale.ENGLISH),
new SimpleDateFormat("dd MMMM yyyy", Locale.ENGLISH),
new SimpleDateFormat("MMMM dd, yyyy", Locale.ENGLISH),
new SimpleDateFormat("dd-MMM-yyyy", Locale.ENGLISH));
public Optional<Date> parseDate(String dateAsString) {

View File

@ -21,7 +21,9 @@ public final class IdBuilder {
public String buildId(Set<Page> pages, List<Rectangle2D> rectanglesPerLine, String type, String entityType) {
return buildId(pages.stream().map(Page::getNumber).collect(Collectors.toList()), rectanglesPerLine, type, entityType);
return buildId(pages.stream()
.map(Page::getNumber)
.collect(Collectors.toList()), rectanglesPerLine, type, entityType);
}
@ -29,7 +31,9 @@ public final class IdBuilder {
StringBuilder sb = new StringBuilder();
sb.append(type).append(entityType);
List<Integer> sortedPageNumbers = pageNumbers.stream().sorted(Comparator.comparingInt(Integer::intValue)).toList();
List<Integer> sortedPageNumbers = pageNumbers.stream()
.sorted(Comparator.comparingInt(Integer::intValue))
.toList();
sortedPageNumbers.forEach(sb::append);
rectanglesPerLine.forEach(rectangle2D -> sb.append(Math.round(rectangle2D.getX()))
.append(Math.round(rectangle2D.getY()))

View File

@ -22,19 +22,25 @@ public class RectangleTransformations {
public static Rectangle2D atomicTextBlockBBox(List<AtomicTextBlock> atomicTextBlocks) {
return atomicTextBlocks.stream().flatMap(atomicTextBlock -> atomicTextBlock.getPositions().stream()).collect(new Rectangle2DBBoxCollector());
return atomicTextBlocks.stream()
.flatMap(atomicTextBlock -> atomicTextBlock.getPositions()
.stream())
.collect(new Rectangle2DBBoxCollector());
}
public static Rectangle2D rectangleBBox(List<Position> positions) {
return positions.stream().map(Position::toRectangle2D).collect(new Rectangle2DBBoxCollector());
return positions.stream()
.map(Position::toRectangle2D)
.collect(new Rectangle2DBBoxCollector());
}
public static Rectangle2D rectangle2DBBox(List<Rectangle2D> rectangle2DList) {
return rectangle2DList.stream().collect(new Rectangle2DBBoxCollector());
return rectangle2DList.stream()
.collect(new Rectangle2DBBoxCollector());
}
@ -49,7 +55,9 @@ public class RectangleTransformations {
if (rectangle2DList.isEmpty()) {
return Collections.emptyList();
}
double splitThreshold = rectangle2DList.stream().mapToDouble(RectangularShape::getWidth).average().orElse(5) * 5.0;
double splitThreshold = rectangle2DList.stream()
.mapToDouble(RectangularShape::getWidth).average()
.orElse(5) * 5.0;
List<List<Rectangle2D>> rectangleListsWithGaps = new LinkedList<>();
List<Rectangle2D> rectangleListWithoutGaps = new LinkedList<>();
@ -66,7 +74,9 @@ public class RectangleTransformations {
previousRectangle = currentRectangle;
}
}
return rectangleListsWithGaps.stream().map(RectangleTransformations::rectangle2DBBox).toList();
return rectangleListsWithGaps.stream()
.map(RectangleTransformations::rectangle2DBBox)
.toList();
}
@ -96,9 +106,9 @@ public class RectangleTransformations {
public BinaryOperator<BBox> combiner() {
return (b1, b2) -> new BBox(Math.min(b1.lowerLeftX, b2.lowerLeftX),
Math.min(b1.lowerLeftY, b2.lowerLeftY),
Math.max(b1.upperRightX, b2.upperRightX),
Math.max(b1.upperRightY, b2.upperRightY));
Math.min(b1.lowerLeftY, b2.lowerLeftY),
Math.max(b1.upperRightX, b2.upperRightX),
Math.max(b1.upperRightY, b2.upperRightY));
}

View File

@ -89,7 +89,10 @@ public class RedactionSearchUtility {
public static TextRange findTextRangesOfAllLinesInYRange(double maxY, double minY, TextBlock textBlock) {
List<TextRange> lineBoundaries = IntStream.range(0, textBlock.numberOfLines()).boxed().map(textBlock::getLineTextRange).filter(lineBoundary -> isWithinYRange(maxY, minY, textBlock, lineBoundary)).toList();
List<TextRange> lineBoundaries = IntStream.range(0, textBlock.numberOfLines()).boxed()
.map(textBlock::getLineTextRange)
.filter(lineBoundary -> isWithinYRange(maxY, minY, textBlock, lineBoundary))
.toList();
if (lineBoundaries.isEmpty()) {
return new TextRange(textBlock.getTextRange().start(), textBlock.getTextRange().start());
}
@ -132,6 +135,7 @@ public class RedactionSearchUtility {
return getTextRangesByPatternWithLineBreaks(textBlock, group, pattern);
}
public static List<TextRange> findTextRangesByRegexIgnoreCase(String regexPattern, TextBlock textBlock) {
Pattern pattern = Patterns.getCompiledPattern(regexPattern, true);
@ -185,19 +189,21 @@ public class RedactionSearchUtility {
return getTextRangesByPattern(textBlock, 0, pattern);
}
public static List<TextRange> findTextRangesByList(List<String> searchList, TextBlock textBlock) {
List<TextRange> boundaries = new LinkedList<>();
for (var searchString: searchList) {
for (var searchString : searchList) {
boundaries.addAll(findTextRangesByString(searchString, textBlock));
}
return boundaries;
}
public static List<TextRange> findTextRangesByListIgnoreCase(List<String> searchList, TextBlock textBlock) {
List<TextRange> boundaries = new LinkedList<>();
for (var searchString: searchList) {
for (var searchString : searchList) {
boundaries.addAll(findTextRangesByStringIgnoreCase(searchString, textBlock));
}
return boundaries;

View File

@ -20,7 +20,8 @@ public final class ResourceLoader {
throw new IllegalArgumentException("could not load classpath resource: " + classpathPath);
}
try (BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), StandardCharsets.UTF_8))) {
return br.lines().collect(Collectors.toSet());
return br.lines()
.collect(Collectors.toSet());
} catch (IOException e) {
throw new IllegalArgumentException("could not load classpath resource: " + classpathPath, e);
}

View File

@ -54,16 +54,16 @@ public final class SeparatorUtils {
private static boolean validateEnd(TextBlock textBlock, TextRange textRange) {
return textRange.end() == textBlock.getTextRange().end() ||//
SeparatorUtils.isSeparator(textBlock.charAt(textRange.end())) ||//
SeparatorUtils.isJapaneseSeparator(textBlock.charAt(textRange.end() - 1));
SeparatorUtils.isSeparator(textBlock.charAt(textRange.end())) ||//
SeparatorUtils.isJapaneseSeparator(textBlock.charAt(textRange.end() - 1));
}
private static boolean validateStart(TextBlock textBlock, TextRange textRange) {
return textRange.start() == textBlock.getTextRange().start() ||//
SeparatorUtils.isSeparator(textBlock.charAt(textRange.start() - 1)) ||//
SeparatorUtils.isJapaneseSeparator(textBlock.charAt(textRange.start()));
SeparatorUtils.isSeparator(textBlock.charAt(textRange.start() - 1)) ||//
SeparatorUtils.isJapaneseSeparator(textBlock.charAt(textRange.start()));
}
}

View File

@ -172,7 +172,7 @@ public abstract class AbstractRedactionIntegrationTest {
when(dictionaryClient.getDictionaryForType(HINT_ONLY_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(HINT_ONLY_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(MUST_REDACT_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(MUST_REDACT_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(PUBLISHED_INFORMATION_INDICATOR,
false));
false));
when(dictionaryClient.getDictionaryForType(TEST_METHOD_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(TEST_METHOD_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(PII_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(DICTIONARY_PII, false));
when(dictionaryClient.getDictionaryForType(PURITY_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(PURITY_INDICATOR, false));
@ -183,9 +183,9 @@ public abstract class AbstractRedactionIntegrationTest {
when(dictionaryClient.getDictionaryForType(FORMULA_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(FORMULA_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(ROTATE_SIMPLE_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(ROTATE_SIMPLE_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(DOSSIER_REDACTIONS_INDICATOR,
true));
true));
when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(IMPORTED_REDACTION_INDICATOR,
true));
true));
}
@ -193,45 +193,102 @@ public abstract class AbstractRedactionIntegrationTest {
protected void loadDictionaryForTest() {
dictionary.computeIfAbsent(DICTIONARY_AUTHOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_author.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/CBI_author.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(DICTIONARY_SPONSOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(VERTEBRATE_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/vertebrate.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/vertebrate.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(DICTIONARY_ADDRESS, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_address.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/CBI_address.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(HINT_ONLY_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/hint_only.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/hint_only.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(MUST_REDACT_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/must_redact.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/must_redact.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(PUBLISHED_INFORMATION_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/published_information.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/published_information.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(TEST_METHOD_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/test_method.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/test_method.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(DICTIONARY_PII, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/PII.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/PII.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(PURITY_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/purity.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/purity.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(IMAGE_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/empty.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(OCR_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/empty.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(LOGO_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/empty.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(SIGNATURE_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/empty.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(FORMULA_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/empty.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dossierDictionary.put(IMPORTED_REDACTION_INDICATOR, new ArrayList<>());
falsePositive.computeIfAbsent(DICTIONARY_PII, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
}
@ -240,7 +297,10 @@ public abstract class AbstractRedactionIntegrationTest {
dictionary.clear();
dictionary.computeIfAbsent(ROTATE_SIMPLE_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/RotateTestFileSimple.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/RotateTestFileSimple.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
}
@ -373,8 +433,8 @@ public abstract class AbstractRedactionIntegrationTest {
ClassPathResource responseJson = new ClassPathResource("files/ner_response.json");
storageService.storeObject(TenantContext.getTenantId(),
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES),
responseJson.getInputStream());
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES),
responseJson.getInputStream());
}
@ -424,7 +484,9 @@ public abstract class AbstractRedactionIntegrationTest {
if (entries == null) {
return Collections.emptyList();
}
return entries.stream().map(this::toDictionaryEntry).collect(Collectors.toList());
return entries.stream()
.map(this::toDictionaryEntry)
.collect(Collectors.toList());
}
@ -480,11 +542,11 @@ public abstract class AbstractRedactionIntegrationTest {
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), fileStream);
storageService.storeObject(TenantContext.getTenantId(),
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES),
cvServiceResponseFileStream);
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES),
cvServiceResponseFileStream);
storageService.storeObject(TenantContext.getTenantId(),
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMAGE_INFO),
imageServiceResponseStream);
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMAGE_INFO),
imageServiceResponseStream);
return request;

View File

@ -104,15 +104,15 @@ public class AnalysisTest extends AbstractRedactionIntegrationTest {
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder()
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(DOSSIER_REDACTIONS_INDICATOR)
.dossierTemplateId(TEST_DOSSIER_ID)
.hexColor("#ffe187")
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.build()));
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(DOSSIER_REDACTIONS_INDICATOR)
.dossierTemplateId(TEST_DOSSIER_ID)
.hexColor("#ffe187")
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.build()));
mockDictionaryCalls(null);

View File

@ -82,18 +82,18 @@ public class DictionaryServiceTest {
when(dictionaryClient.getVersion(anyString())).thenReturn(0L);
when(dictionaryClient.getColors(anyString())).thenReturn(new Colors("dossierTemplateId",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc"));
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc"));
var type1 = new Type();
type1.setType("type1");
@ -101,7 +101,9 @@ public class DictionaryServiceTest {
type1.setVersion(1L);
type1.setHexColor("#cccccc");
type1.setHasDictionary(true);
type1.setEntries(Stream.of("a", "b", "c").map(t -> new DictionaryEntry(1, t, 1L, false, "type1")).collect(Collectors.toList()));
type1.setEntries(Stream.of("a", "b", "c")
.map(t -> new DictionaryEntry(1, t, 1L, false, "type1"))
.collect(Collectors.toList()));
var type2 = new Type();
type2.setType("type2");
@ -109,7 +111,9 @@ public class DictionaryServiceTest {
type2.setVersion(1L);
type2.setHexColor("#cccccc");
type2.setHasDictionary(true);
type2.setEntries(Stream.of("d", "e", "f").map(t -> new DictionaryEntry(1, t, 1L, false, "type2")).collect(Collectors.toList()));
type2.setEntries(Stream.of("d", "e", "f")
.map(t -> new DictionaryEntry(1, t, 1L, false, "type2"))
.collect(Collectors.toList()));
var type1Updated = new Type();
type1Updated.setType("type1");
@ -117,7 +121,9 @@ public class DictionaryServiceTest {
type1Updated.setVersion(2L);
type1Updated.setHexColor("#cccccc");
type1Updated.setHasDictionary(true);
type1Updated.setEntries(Stream.of("z", "q", "x").map(t -> new DictionaryEntry(1, t, 2L, false, "type1")).collect(Collectors.toList()));
type1Updated.setEntries(Stream.of("z", "q", "x")
.map(t -> new DictionaryEntry(1, t, 2L, false, "type1"))
.collect(Collectors.toList()));
when(dictionaryClient.getAllTypesForDossierTemplate(anyString(), anyBoolean())).thenReturn(List.of(type1, type2));
when(dictionaryClient.getDictionaryForType("type1", null)).thenReturn(type1);
@ -174,18 +180,18 @@ public class DictionaryServiceTest {
when(dictionaryClient.getVersion(anyString())).thenReturn(0L);
when(dictionaryClient.getColors(anyString())).thenReturn(new Colors("dtId",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc"));
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc",
"#cccccc"));
var type = "type";
var dtType = new Type();
@ -195,7 +201,9 @@ public class DictionaryServiceTest {
dtType.setHexColor("#cccccc");
dtType.setHasDictionary(true);
dtType.setDossierTemplateId("dtId");
List<DictionaryEntry> dossierTemplateEntries = Stream.of("aa", "bb").map(t -> new DictionaryEntry(1, t, 1L, false, "type1")).collect(Collectors.toList());
List<DictionaryEntry> dossierTemplateEntries = Stream.of("aa", "bb")
.map(t -> new DictionaryEntry(1, t, 1L, false, "type1"))
.collect(Collectors.toList());
dossierTemplateEntries.add(new DictionaryEntry(1, "cc", 2L, false, "type1"));
assertThat(dossierTemplateEntries.size()).isEqualTo(3);
dtType.setEntries(dossierTemplateEntries);
@ -208,7 +216,9 @@ public class DictionaryServiceTest {
dossierType.setHasDictionary(true);
dossierType.setDossierTemplateId("dtId");
dossierType.setDossierId("dossierId");
List<DictionaryEntry> dossierEntries = Stream.of("aa", "bb").map(t -> new DictionaryEntry(1, t, 2L, true, "dossierType")).collect(Collectors.toList());
List<DictionaryEntry> dossierEntries = Stream.of("aa", "bb")
.map(t -> new DictionaryEntry(1, t, 2L, true, "dossierType"))
.collect(Collectors.toList());
dossierEntries.add(new DictionaryEntry(1, "dd", 1L, false, "dossierType"));
assertThat(dossierEntries.size()).isEqualTo(3);
dossierType.setEntries(dossierEntries);
@ -224,15 +234,17 @@ public class DictionaryServiceTest {
dictionaryService.updateDictionary("dtId", "dossierId");
var dict = dictionaryService.getDeepCopyDictionary("dtId", "dossierId");
assertThat(dict.getDictionaryModels().size()).isEqualTo(1);
var dictModel = dict.getDictionaryModels().get(0);
var dictModel = dict.getDictionaryModels()
.get(0);
assertThat(dictModel.getType()).isEqualTo(type);
assertThat(dictModel.getEntries().size()).isEqualTo(4);
dictModel.getEntries().forEach(entry -> {
switch (entry.getValue()) {
case "aa", "dd", "bb" -> assertThat(entry.getTypeId()).isEqualTo(dossierType.getTypeId());
case "cc" -> assertThat(entry.getTypeId()).isEqualTo(dtType.getTypeId());
}
});
dictModel.getEntries()
.forEach(entry -> {
switch (entry.getValue()) {
case "aa", "dd", "bb" -> assertThat(entry.getTypeId()).isEqualTo(dossierType.getTypeId());
case "cc" -> assertThat(entry.getTypeId()).isEqualTo(dtType.getTypeId());
}
});
}
}

View File

@ -8,6 +8,7 @@ import java.util.List;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
@ -55,9 +56,11 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
AnalyzeRequest request = uploadFileToStorage("files/ImportedRedactions/18 Chlorothalonil RAR 08 Volume 3CA B 6a Oct 2017.pdf");
ClassPathResource importedRedactionClasspathResource = new ClassPathResource("files/ImportedRedactions/18 Chlorothalonil RAR 08 Volume 3CA B 6a Oct 2017.IMPORTED_REDACTIONS.json");
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), importedRedactionClasspathResource.getInputStream());
ClassPathResource importedRedactionClasspathResource = new ClassPathResource(
"files/ImportedRedactions/18 Chlorothalonil RAR 08 Volume 3CA B 6a Oct 2017.IMPORTED_REDACTIONS.json");
storageService.storeObject(TenantContext.getTenantId(),
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS),
importedRedactionClasspathResource.getInputStream());
// AnalyzeRequest request = prepareStorage("files/Documine/Flora/ProblemDocs/SOLICITA_VICTRATO-GOLD-II_Item 21_Mutacao_Genica (1).pdf",
// "files/Documine/Flora/ProblemDocs/SOLICITA_VICTRATO-GOLD-II_Item 21_Mutacao_Genica (1).TABLES.json");
@ -86,7 +89,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
// FIXME TableNodeFactory: 36, why has table no rows/cols here.
AnalyzeRequest request = prepareStorage("files/Documine/Flora/A8591B/2009773e2e05919bb9e46aeedcc8b924.ORIGIN.pdf",
"files/Documine/Flora/A8591B/2009773e2e05919bb9e46aeedcc8b924.ORIGIN.TABLES.json");
"files/Documine/Flora/A8591B/2009773e2e05919bb9e46aeedcc8b924.ORIGIN.TABLES.json");
System.out.println("Start Full integration test");
analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request);
@ -166,15 +169,15 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, true)).thenReturn(List.of(Type.builder()
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(DOSSIER_REDACTIONS_INDICATOR)
.dossierTemplateId(TEST_DOSSIER_ID)
.hexColor("#ffe187")
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.build()));
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(DOSSIER_REDACTIONS_INDICATOR)
.dossierTemplateId(TEST_DOSSIER_ID)
.hexColor("#ffe187")
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.build()));
mockDictionaryCalls(null);

View File

@ -87,15 +87,15 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, true)).thenReturn(List.of(Type.builder()
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(DOSSIER_REDACTIONS_INDICATOR)
.dossierTemplateId(TEST_DOSSIER_ID)
.hexColor("#ffe187")
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.build()));
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(DOSSIER_REDACTIONS_INDICATOR)
.dossierTemplateId(TEST_DOSSIER_ID)
.hexColor("#ffe187")
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.build()));
mockDictionaryCalls(null);
@ -122,6 +122,7 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
assertThat(recommendations).containsExactlyInAnyOrder("Michael N.", "Funnarie B.", "Feuer A.");
}
@Test
public void acceptanceTests() throws IOException {
@ -133,8 +134,10 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
System.out.println("Finished analysis");
EntityLog entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var publishedInformationEntry1 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst().orElseThrow();
var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getContainingNodeId()).findFirst().orElseThrow();
var publishedInformationEntry1 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst()
.orElseThrow();
var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getContainingNodeId()).findFirst()
.orElseThrow();
assertEquals(EntryState.SKIPPED, asyaLyon1.getState());
@ -146,8 +149,10 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var publishedInformationEntry2 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst().orElseThrow();
var asyaLyon2 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry2.getContainingNodeId()).findFirst().orElseThrow();
var publishedInformationEntry2 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst()
.orElseThrow();
var asyaLyon2 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry2.getContainingNodeId()).findFirst()
.orElseThrow();
assertEquals(EntryState.APPLIED, asyaLyon2.getState());
@ -168,13 +173,17 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
.stream()
.filter(entry -> entry.getType().equals(type))
.filter(entry -> entry.getValue().equals(value))
.filter(entry -> entry.getContainingNodeId().get(0).equals(sectionNumber.get(0)));
.filter(entry -> entry.getContainingNodeId()
.get(0).equals(sectionNumber.get(0)));
}
private static Stream<EntityLogEntry> findEntityByTypeAndValue(EntityLog redactionLog, String type, String value) {
return redactionLog.getEntityLogEntry().stream().filter(entry -> entry.getType().equals(type)).filter(entry -> entry.getValue().equals(value));
return redactionLog.getEntityLogEntry()
.stream()
.filter(entry -> entry.getType().equals(type))
.filter(entry -> entry.getValue().equals(value));
}
@ -201,7 +210,9 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
var redactionLog2 = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
assertEquals(EntryState.IGNORED,
findEntityByTypeAndValue(redactionLog2, "CBI_author", "Desiree").filter(entry -> entry.getEntryType().equals(EntryType.ENTITY)).findFirst().get().getState());
findEntityByTypeAndValue(redactionLog2, "CBI_author", "Desiree").filter(entry -> entry.getEntryType().equals(EntryType.ENTITY))
.findFirst()
.get().getState());
}

View File

@ -78,15 +78,15 @@ public class RedactionIntegrationV2Test extends AbstractRedactionIntegrationTest
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder()
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(DOSSIER_REDACTIONS_INDICATOR)
.dossierTemplateId(TEST_DOSSIER_ID)
.hexColor("#ffe187")
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.build()));
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(DOSSIER_REDACTIONS_INDICATOR)
.dossierTemplateId(TEST_DOSSIER_ID)
.hexColor("#ffe187")
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.build()));
mockDictionaryCalls(null);
@ -128,7 +128,8 @@ public class RedactionIntegrationV2Test extends AbstractRedactionIntegrationTest
assertThat(entityLog.getEntityLogEntry().size()).isEqualTo(1);
EntityLogEntry redactionLogEntry = entityLog.getEntityLogEntry().get(0);
EntityLogEntry redactionLogEntry = entityLog.getEntityLogEntry()
.get(0);
assertThat(redactionLogEntry.getType()).isEqualTo(DICTIONARY_AUTHOR);
assertThat(redactionLogEntry.getValue()).isEqualTo(entryAuthorAndPIIDictionary);
@ -164,7 +165,8 @@ public class RedactionIntegrationV2Test extends AbstractRedactionIntegrationTest
assertThat(redactionLog.getEntityLogEntry().size()).isEqualTo(1);
EntityLogEntry redactionLogEntry = redactionLog.getEntityLogEntry().get(0);
EntityLogEntry redactionLogEntry = redactionLog.getEntityLogEntry()
.get(0);
assertThat(redactionLogEntry.getType()).isEqualTo(DICTIONARY_AUTHOR);
assertThat(redactionLogEntry.getValue()).isEqualTo(entryAuthorDictionary);

View File

@ -136,99 +136,99 @@ public class RulesTest {
private final Map<String, List<String>> dossierDictionary = new HashMap<>();
private final Map<String, List<String>> falsePositive = new HashMap<>();
private static final Map<String, String> typeColorMap = Map.ofEntries(entry(VERTEBRATE, "#ff85f7"),
entry(ADDRESS, "#ffe187"),
entry(AUTHOR, "#ffe187"),
entry(SPONSOR, "#85ebff"),
entry(NO_REDACTION_INDICATOR, "#be85ff"),
entry(REDACTION_INDICATOR, "#caff85"),
entry(HINT_ONLY, "#abc0c4"),
entry(MUST_REDACT, "#fab4c0"),
entry(PUBLISHED_INFORMATION, "#85ebff"),
entry(TEST_METHOD, "#91fae8"),
entry(PII, "#66ccff"),
entry(PURITY, "#ffe187"),
entry(IMAGE, "#fcc5fb"),
entry(OCR, "#fcc5fb"),
entry(LOGO, "#ffe187"),
entry(FORMULA, "#ffe187"),
entry(SIGNATURE, "#ffe187"),
entry(IMPORTED_REDACTION, "#fcfbe6"));
entry(ADDRESS, "#ffe187"),
entry(AUTHOR, "#ffe187"),
entry(SPONSOR, "#85ebff"),
entry(NO_REDACTION_INDICATOR, "#be85ff"),
entry(REDACTION_INDICATOR, "#caff85"),
entry(HINT_ONLY, "#abc0c4"),
entry(MUST_REDACT, "#fab4c0"),
entry(PUBLISHED_INFORMATION, "#85ebff"),
entry(TEST_METHOD, "#91fae8"),
entry(PII, "#66ccff"),
entry(PURITY, "#ffe187"),
entry(IMAGE, "#fcc5fb"),
entry(OCR, "#fcc5fb"),
entry(LOGO, "#ffe187"),
entry(FORMULA, "#ffe187"),
entry(SIGNATURE, "#ffe187"),
entry(IMPORTED_REDACTION, "#fcfbe6"));
private static final Map<String, Boolean> hintTypeMap = Map.ofEntries(entry(VERTEBRATE, true),
entry(ADDRESS, false),
entry(AUTHOR, false),
entry(SPONSOR, false),
entry(NO_REDACTION_INDICATOR, true),
entry(REDACTION_INDICATOR, true),
entry(HINT_ONLY, true),
entry(MUST_REDACT, true),
entry(PUBLISHED_INFORMATION, true),
entry(TEST_METHOD, true),
entry(PII, false),
entry(PURITY, false),
entry(IMAGE, true),
entry(OCR, true),
entry(FORMULA, false),
entry(LOGO, false),
entry(SIGNATURE, false),
entry(DOSSIER_REDACTIONS, false),
entry(IMPORTED_REDACTION, false));
entry(ADDRESS, false),
entry(AUTHOR, false),
entry(SPONSOR, false),
entry(NO_REDACTION_INDICATOR, true),
entry(REDACTION_INDICATOR, true),
entry(HINT_ONLY, true),
entry(MUST_REDACT, true),
entry(PUBLISHED_INFORMATION, true),
entry(TEST_METHOD, true),
entry(PII, false),
entry(PURITY, false),
entry(IMAGE, true),
entry(OCR, true),
entry(FORMULA, false),
entry(LOGO, false),
entry(SIGNATURE, false),
entry(DOSSIER_REDACTIONS, false),
entry(IMPORTED_REDACTION, false));
private static final Map<String, Boolean> caseInSensitiveMap = Map.ofEntries(entry(VERTEBRATE, true),
entry(ADDRESS, false),
entry(AUTHOR, false),
entry(SPONSOR, false),
entry(NO_REDACTION_INDICATOR, true),
entry(REDACTION_INDICATOR, true),
entry(HINT_ONLY, true),
entry(MUST_REDACT, true),
entry(PUBLISHED_INFORMATION, true),
entry(TEST_METHOD, false),
entry(PII, false),
entry(PURITY, false),
entry(IMAGE, true),
entry(OCR, true),
entry(SIGNATURE, true),
entry(LOGO, true),
entry(FORMULA, true),
entry(DOSSIER_REDACTIONS, false),
entry(IMPORTED_REDACTION, false));
entry(ADDRESS, false),
entry(AUTHOR, false),
entry(SPONSOR, false),
entry(NO_REDACTION_INDICATOR, true),
entry(REDACTION_INDICATOR, true),
entry(HINT_ONLY, true),
entry(MUST_REDACT, true),
entry(PUBLISHED_INFORMATION, true),
entry(TEST_METHOD, false),
entry(PII, false),
entry(PURITY, false),
entry(IMAGE, true),
entry(OCR, true),
entry(SIGNATURE, true),
entry(LOGO, true),
entry(FORMULA, true),
entry(DOSSIER_REDACTIONS, false),
entry(IMPORTED_REDACTION, false));
private static final Map<String, Boolean> recommendationTypeMap = Map.ofEntries(entry(VERTEBRATE, false),
entry(ADDRESS, false),
entry(AUTHOR, false),
entry(SPONSOR, false),
entry(NO_REDACTION_INDICATOR, false),
entry(REDACTION_INDICATOR, false),
entry(HINT_ONLY, false),
entry(MUST_REDACT, false),
entry(PUBLISHED_INFORMATION, false),
entry(TEST_METHOD, false),
entry(PII, false),
entry(PURITY, false),
entry(IMAGE, false),
entry(OCR, false),
entry(FORMULA, false),
entry(SIGNATURE, false),
entry(LOGO, false),
entry(DOSSIER_REDACTIONS, false),
entry(IMPORTED_REDACTION, false));
entry(ADDRESS, false),
entry(AUTHOR, false),
entry(SPONSOR, false),
entry(NO_REDACTION_INDICATOR, false),
entry(REDACTION_INDICATOR, false),
entry(HINT_ONLY, false),
entry(MUST_REDACT, false),
entry(PUBLISHED_INFORMATION, false),
entry(TEST_METHOD, false),
entry(PII, false),
entry(PURITY, false),
entry(IMAGE, false),
entry(OCR, false),
entry(FORMULA, false),
entry(SIGNATURE, false),
entry(LOGO, false),
entry(DOSSIER_REDACTIONS, false),
entry(IMPORTED_REDACTION, false));
private static final Map<String, Integer> rankTypeMap = Map.ofEntries(entry(PURITY, 155),
entry(PII, 150),
entry(ADDRESS, 140),
entry(AUTHOR, 130),
entry(SPONSOR, 120),
entry(VERTEBRATE, 110),
entry(MUST_REDACT, 100),
entry(REDACTION_INDICATOR, 90),
entry(NO_REDACTION_INDICATOR, 80),
entry(PUBLISHED_INFORMATION, 70),
entry(TEST_METHOD, 60),
entry(HINT_ONLY, 50),
entry(IMAGE, 30),
entry(OCR, 29),
entry(LOGO, 28),
entry(SIGNATURE, 27),
entry(FORMULA, 26),
entry(DOSSIER_REDACTIONS, 200),
entry(IMPORTED_REDACTION, 200));
entry(PII, 150),
entry(ADDRESS, 140),
entry(AUTHOR, 130),
entry(SPONSOR, 120),
entry(VERTEBRATE, 110),
entry(MUST_REDACT, 100),
entry(REDACTION_INDICATOR, 90),
entry(NO_REDACTION_INDICATOR, 80),
entry(PUBLISHED_INFORMATION, 70),
entry(TEST_METHOD, 60),
entry(HINT_ONLY, 50),
entry(IMAGE, 30),
entry(OCR, 29),
entry(LOGO, 28),
entry(SIGNATURE, 27),
entry(FORMULA, 26),
entry(DOSSIER_REDACTIONS, 200),
entry(IMPORTED_REDACTION, 200));
private final Colors colors = new Colors();
@Autowired
@ -273,15 +273,15 @@ public class RulesTest {
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder()
.id(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(DOSSIER_REDACTIONS)
.dossierTemplateId(TEST_DOSSIER_ID)
.hexColor("#ffe187")
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS))
.build()));
.id(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(DOSSIER_REDACTIONS)
.dossierTemplateId(TEST_DOSSIER_ID)
.hexColor("#ffe187")
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS))
.build()));
mockDictionaryCalls(null);
mockDictionaryCalls(0L);
@ -372,10 +372,10 @@ public class RulesTest {
log.warn("WARNING: {} files from {} failed", failedFiles.size(), fileSize);
for (String fileName : failedFiles.keySet()) {
log.warn(" - '{}' failed with Error: {} See line {} in {}",
fileName,
failedFiles.get(fileName),
failedFiles.get(fileName).getStackTrace()[0].getLineNumber(),
failedFiles.get(fileName).getStackTrace()[0].getClassName());
fileName,
failedFiles.get(fileName),
failedFiles.get(fileName).getStackTrace()[0].getLineNumber(),
failedFiles.get(fileName).getStackTrace()[0].getClassName());
}
}
@ -442,9 +442,13 @@ public class RulesTest {
assertThat(entityLog.getLegalBasisVersion()).isEqualTo(savedRedactionLog.getLegalBasisVersion());
assertThat(entityLog.getEntityLogEntry()
.stream().filter(r -> !r.getEntryType().equals(EntryType.FALSE_POSITIVE)).filter(r -> !r.getEntryType().equals(EntryType.FALSE_RECOMMENDATION))
.collect(Collectors.toSet())
.size()).isEqualTo(savedRedactionLog.getRedactionLogEntry().stream().filter(r -> !r.isFalsePositive()).collect(Collectors.toSet()).size());
.stream()
.filter(r -> !r.getEntryType().equals(EntryType.FALSE_POSITIVE))
.filter(r -> !r.getEntryType().equals(EntryType.FALSE_RECOMMENDATION))
.collect(Collectors.toSet()).size()).isEqualTo(savedRedactionLog.getRedactionLogEntry()
.stream()
.filter(r -> !r.isFalsePositive())
.collect(Collectors.toSet()).size());
assertThat(entityLog.getLegalBasis().size()).isEqualTo(savedRedactionLog.getLegalBasis().size());
for (EntityLogLegalBasis redactionLegalBasis : entityLog.getLegalBasis()) {
@ -458,7 +462,10 @@ public class RulesTest {
}
for (EntityLogEntry redactionLogEntry : entityLog.getEntityLogEntry()) {
var savedRedactionLogEntry = savedRedactionLog.getRedactionLogEntry().stream().filter(r -> r.getId().equalsIgnoreCase(redactionLogEntry.getId())).findFirst();
var savedRedactionLogEntry = savedRedactionLog.getRedactionLogEntry()
.stream()
.filter(r -> r.getId().equalsIgnoreCase(redactionLogEntry.getId()))
.findFirst();
assertThat(savedRedactionLogEntry).isPresent();
assertThat(savedRedactionLogEntry.get().getId()).isEqualTo(redactionLogEntry.getId());
assertThat(savedRedactionLogEntry.get().getType()).isEqualTo(redactionLogEntry.getType());
@ -473,7 +480,8 @@ public class RulesTest {
assertThat(savedRedactionLogEntry.get().isFalsePositive()).isEqualTo(redactionLogEntry.getEntryType().equals(EntryType.FALSE_POSITIVE));
assertThat(savedRedactionLogEntry.get().getSection()).isEqualTo(redactionLogEntry.getSection());
assertThat(savedRedactionLogEntry.get().getColor()).isEqualTo(redactionLogEntry.getColor());
assertThat(savedRedactionLogEntry.get().getSectionNumber()).isEqualTo(redactionLogEntry.getContainingNodeId().get(0));
assertThat(savedRedactionLogEntry.get().getSectionNumber()).isEqualTo(redactionLogEntry.getContainingNodeId()
.get(0));
assertThat(savedRedactionLogEntry.get().getTextBefore()).isEqualTo(redactionLogEntry.getTextBefore());
assertThat(savedRedactionLogEntry.get().getTextAfter()).isEqualTo(redactionLogEntry.getTextAfter());
assertThat(savedRedactionLogEntry.get().getStartOffset()).isEqualTo(redactionLogEntry.getStartOffset());
@ -485,8 +493,7 @@ public class RulesTest {
assertThat(savedRedactionLogEntry.get().isExcluded()).isEqualTo(redactionLogEntry.isExcluded());
for (Position position : redactionLogEntry.getPositions()) {
var savedRectangle = savedRedactionLogEntry.get()
.getPositions()
var savedRectangle = savedRedactionLogEntry.get().getPositions()
.stream()
.filter(r -> r.getPage() == position.getPageNumber())
.filter(r -> r.getTopLeft().getX() == position.x())
@ -498,17 +505,16 @@ public class RulesTest {
}
for (Change change : redactionLogEntry.getChanges()) {
var savedChange = savedRedactionLogEntry.get()
.getChanges()
var savedChange = savedRedactionLogEntry.get().getChanges()
.stream()
.filter(c -> c.getAnalysisNumber() == change.getAnalysisNumber()).filter(c -> c.getType().name().equals(change.getType().name()))
.filter(c -> c.getAnalysisNumber() == change.getAnalysisNumber())
.filter(c -> c.getType().name().equals(change.getType().name()))
.findFirst();
assertThat(savedChange).isPresent();
}
for (ManualChange manualChange : redactionLogEntry.getManualChanges()) {
var savedManualChange = savedRedactionLogEntry.get()
.getManualChanges()
var savedManualChange = savedRedactionLogEntry.get().getManualChanges()
.stream()
.filter(m -> m.getManualRedactionType().name().equals(manualChange.getManualRedactionType().name()))
.filter(m -> m.getUserId().equalsIgnoreCase(manualChange.getUserId()))
@ -517,7 +523,8 @@ public class RulesTest {
assertThat(savedManualChange).isPresent();
}
assertThat(savedRedactionLogEntry.get().getEngines()).containsExactlyInAnyOrder(redactionLogEntry.getEngines().toArray(Engine[]::new));
assertThat(savedRedactionLogEntry.get().getEngines()).containsExactlyInAnyOrder(redactionLogEntry.getEngines()
.toArray(Engine[]::new));
assertThat(savedRedactionLogEntry.get().getReference()).containsAll(redactionLogEntry.getReference());
assertThat(savedRedactionLogEntry.get().getImportedRedactionIntersections()).containsAll(redactionLogEntry.getImportedRedactionIntersections());
@ -622,8 +629,8 @@ public class RulesTest {
ClassPathResource responseJson = new ClassPathResource("files/ner_response.json");
storageService.storeObject(TenantContext.getTenantId(),
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES),
responseJson.getInputStream());
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES),
responseJson.getInputStream());
}
@ -647,8 +654,8 @@ public class RulesTest {
.build();
storageService.storeObject(TenantContext.getTenantId(),
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES),
new ClassPathResource("files/cv_service_empty_response.json").getInputStream());
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES),
new ClassPathResource("files/cv_service_empty_response.json").getInputStream());
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), stream);
return request;
@ -659,45 +666,102 @@ public class RulesTest {
private void loadDictionaryForTest() {
dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_author.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/CBI_author.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(VERTEBRATE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/vertebrate.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/vertebrate.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(ADDRESS, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_address.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/CBI_address.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(HINT_ONLY, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/hint_only.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/hint_only.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(MUST_REDACT, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/must_redact.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/must_redact.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(PUBLISHED_INFORMATION, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/published_information.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/published_information.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(TEST_METHOD, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/test_method.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/test_method.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(PII, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/PII.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/PII.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/purity.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/purity.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/empty.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(OCR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/empty.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(LOGO, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/empty.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(SIGNATURE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/empty.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(FORMULA, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/empty.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>());
falsePositive.computeIfAbsent(PII, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
.addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
}
@ -736,12 +800,12 @@ public class RulesTest {
when(dictionaryClient.getDictionaryForType(AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(AUTHOR, false));
when(dictionaryClient.getDictionaryForType(SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(SPONSOR, false));
when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR,
false));
false));
when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(HINT_ONLY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(HINT_ONLY, false));
when(dictionaryClient.getDictionaryForType(MUST_REDACT + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(MUST_REDACT, false));
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION,
false));
false));
when(dictionaryClient.getDictionaryForType(TEST_METHOD + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(TEST_METHOD, false));
when(dictionaryClient.getDictionaryForType(PII + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PII, false));
when(dictionaryClient.getDictionaryForType(PURITY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PURITY, false));
@ -809,7 +873,10 @@ public class RulesTest {
protected LayoutParsingFinishedEvent analyzeDocumentStructure(LayoutParsingType layoutParsingType) {
return layoutParsingPipeline.parseLayoutAndSaveFilesToStorage(LayoutParsingRequestProvider.build(layoutParsingType,
AnalyzeRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build()));
AnalyzeRequest.builder()
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.build()));
}

View File

@ -1,6 +1,5 @@
package com.iqser.red.service.redaction.v1.server.annotate;
import java.util.List;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;

View File

@ -79,8 +79,7 @@ public class AnnotationService {
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
pdDocument.save(byteArrayOutputStream);
return AnnotateResponse.builder().document(byteArrayOutputStream.toByteArray())
.build();
return AnnotateResponse.builder().document(byteArrayOutputStream.toByteArray()).build();
}
}
}

View File

@ -13,10 +13,7 @@ public class TextEntityTest {
@Test
public void testMatchedRule() {
PrecursorEntity entity = PrecursorEntity.builder()
.type("PII")
.entityType(EntityType.ENTITY)
.build();
PrecursorEntity entity = PrecursorEntity.builder().type("PII").entityType(EntityType.ENTITY).build();
entity.skip("CBI.1.0", "");
entity.skip("CBI.2.0", "");
entity.skip("CBI.3.0", "");
@ -27,14 +24,10 @@ public class TextEntityTest {
}
@Test
public void testMatchedRuleWithNonsense() {
PrecursorEntity entity = PrecursorEntity.builder()
.type("PII")
.entityType(EntityType.ENTITY)
.build();
PrecursorEntity entity = PrecursorEntity.builder().type("PII").entityType(EntityType.ENTITY).build();
assertThrows(IllegalArgumentException.class, () -> {
entity.skip("", "");
});

View File

@ -49,19 +49,26 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
entityCreationService = new EntityCreationService(entityEnrichmentService, kieSession);
}
@Test
public void assertEntitiesAreDuplicatedWithTheirTableCell() {
Document document = buildGraph("files/Minimal Examples/Meto1_Page22.pdf");
List<TextEntity> entities = entityCreationService.byString("Surface Water", "test", EntityType.ENTITY, document).toList();
List<TextEntity> entities = entityCreationService.byString("Surface Water", "test", EntityType.ENTITY, document)
.toList();
assertEquals(3, entities.size());
assertEquals(1, entities.stream().distinct().count());
assertEquals(1,
entities.stream()
.distinct()
.count());
assertEquals(2, entities.get(0).getDuplicateTextRanges().size());
var node = entities.get(0).getDeepestFullyContainingNode();
assertTrue(node.getTextRange().contains(entities.get(0).getTextRange()));
assertTrue(entities.get(0).getDuplicateTextRanges().stream().allMatch(tr -> node.getTextRange().contains(tr)));
assertTrue(entities.get(0).getDuplicateTextRanges()
.stream()
.allMatch(tr -> node.getTextRange().contains(tr)));
}
@ -84,8 +91,10 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
Document document = buildGraph("files/new/crafted document.pdf");
String type = "CBI_author";
assertTrue(entityCreationService.byTextRange(new TextRange(0, 10), type, EntityType.ENTITY, document).isPresent());
assertTrue(entityCreationService.byTextRange(new TextRange(0, 10), type, EntityType.ENTITY, document).isPresent());
assertTrue(entityCreationService.byTextRange(new TextRange(0, 10), type, EntityType.ENTITY, document)
.isPresent());
assertTrue(entityCreationService.byTextRange(new TextRange(0, 10), type, EntityType.ENTITY, document)
.isPresent());
assertEquals(1, document.getEntities().size());
verify(kieSession, times(1)).insert(any(TextEntity.class));
}
@ -114,7 +123,7 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
assertEquals("s Donut ←", textEntity.getTextAfter());
assertEquals(searchTerm, textEntity.getValue());
assertEquals("Rule 5: Do not redact genitive CBI_authors (Entries based on Dict) ",
textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
assertEquals(3, textEntity.getIntersectingNodes().size());
assertEquals(5, textEntity.getDeepestFullyContainingNode().getNumberOnPage());
assertInstanceOf(Paragraph.class, textEntity.getDeepestFullyContainingNode());
@ -181,11 +190,18 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
.allEntriesInOrder()//
.filter(entry -> entry.getType().equals(NodeType.TABLE))//
.map(DocumentTree.Entry::getNode)//
.findFirst().orElseThrow();
.findFirst()
.orElseThrow();
assertEquals(5, table.getNumberOfCols());
assertEquals(4, table.getNumberOfRows());
assertEquals(5, table.streamHeaders().toList().size());
CharSequence firstHeader = table.streamHeadersForCell(1, 1).map(TableCell::getTextBlock).map(TextBlock::getSearchText).findFirst().orElseThrow();
assertEquals(5,
table.streamHeaders()
.toList().size());
CharSequence firstHeader = table.streamHeadersForCell(1, 1)
.map(TableCell::getTextBlock)
.map(TextBlock::getSearchText)
.findFirst()
.orElseThrow();
assertEquals("Author(s)", firstHeader.toString().stripTrailing());
}
@ -207,17 +223,23 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
public void assertTableStructureMetolachlor() {
Document document = buildGraph("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06");
Table table = (Table) document.getDocumentTree()
.allEntriesInOrder()
.filter(entry -> entry.getNode().getPages().stream().anyMatch(page -> page.getNumber() == 22))
Table table = (Table) document.getDocumentTree().allEntriesInOrder()
.filter(entry -> entry.getNode().getPages()
.stream()
.anyMatch(page -> page.getNumber() == 22))
.filter(entry -> entry.getType().equals(NodeType.TABLE))
.map(DocumentTree.Entry::getNode)
.findFirst()
.orElseThrow();
assertEquals(5, table.getNumberOfCols());
assertEquals(14, table.getNumberOfRows());
assertEquals(10, table.streamHeaders().toList().size());
List<String> twoHeaders = table.streamHeadersForCell(2, 1).map(TableCell::getTextBlock).map(TextBlock::getSearchText).toList();
assertEquals(10,
table.streamHeaders()
.toList().size());
List<String> twoHeaders = table.streamHeadersForCell(2, 1)
.map(TableCell::getTextBlock)
.map(TextBlock::getSearchText)
.toList();
assertEquals(2, twoHeaders.size());
assertEquals("Component of residue definition: S-Metolachlor", twoHeaders.get(0).stripTrailing());
assertEquals("Method type", twoHeaders.get(1).stripTrailing());
@ -233,12 +255,13 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
assertEquals("except Cranberry; Vegetable, ", textEntity.getTextBefore());
assertEquals(", Group 9;", textEntity.getTextAfter());
assertEquals("1.1.4 Evaluations carried out under other regulatory contexts ",
textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
assertEquals("1.1.4 Evaluations carried out under other regulatory contexts ", textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
assertEquals(searchTerm, textEntity.getValue());
assertEquals(3, textEntity.getIntersectingNodes().size());
assertEquals(5, textEntity.getDeepestFullyContainingNode().getNumberOnPage());
assertTrue(textEntity.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 10));
assertTrue(textEntity.getPages()
.stream()
.allMatch(pageNode -> pageNode.getNumber() == 10));
assertInstanceOf(Paragraph.class, textEntity.getDeepestFullyContainingNode());
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
@ -262,11 +285,13 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
assertEquals("2.6.1 Summary of ", textEntity.getTextBefore());
assertEquals(" and excretion in", textEntity.getTextAfter());
assertEquals("2.6.1 Summary of absorption, distribution, metabolism and excretion in mammals ",
textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
assertEquals(searchTerm, textEntity.getValue());
assertEquals(3, textEntity.getIntersectingNodes().size());
assertEquals(4, textEntity.getDeepestFullyContainingNode().getNumberOnPage());
assertTrue(textEntity.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 33));
assertTrue(textEntity.getPages()
.stream()
.allMatch(pageNode -> pageNode.getNumber() == 33));
assertInstanceOf(Headline.class, textEntity.getDeepestFullyContainingNode());
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
@ -285,8 +310,10 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
assertEquals(searchTerm, textEntity.getValue());
assertEquals(4, textEntity.getIntersectingNodes().size());
assertEquals("Table 2.7-1: List of substances and metabolites and related structural formula ",
textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
assertTrue(textEntity.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 54));
textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
assertTrue(textEntity.getPages()
.stream()
.allMatch(pageNode -> pageNode.getNumber() == 54));
assertEquals(26, textEntity.getDeepestFullyContainingNode().getNumberOnPage());
assertInstanceOf(TableCell.class, textEntity.getDeepestFullyContainingNode());
@ -298,7 +325,8 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
// this might fail, if an entity with the same name exists twice in the deepest containing node
private static void assertSameOffsetInAllIntersectingNodes(String searchTerm, TextEntity textEntity) {
List<Integer> paragraphStart = textEntity.getIntersectingNodes().stream()//
List<Integer> paragraphStart = textEntity.getIntersectingNodes()
.stream()//
.map(SemanticNode::getTextBlock)//
.map(textBlock -> textBlock.indexOf(searchTerm, textEntity.getDeepestFullyContainingNode().getTextRange().start()))//
.toList();
@ -316,14 +344,23 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
TextRange textRange = new TextRange(start, start + searchTerm.length());
TextEntity textEntity = TextEntity.initialEntityNode(textRange, "123", EntityType.ENTITY, document);
entityCreationService.addEntityToGraph(textEntity, document);
Page pageNode = document.getPages().stream().filter(page -> page.getNumber() == pageNumber).findFirst().orElseThrow();
Page pageNode = document.getPages()
.stream()
.filter(page -> page.getNumber() == pageNumber)
.findFirst()
.orElseThrow();
assertEquals(textEntity.getValue(), searchTerm);
assertTrue(pageNode.getEntities().contains(textEntity));
assertTrue(document.getPages().stream().filter(page -> page != pageNode).noneMatch(page -> page.getEntities().contains(textEntity)));
assertTrue(document.getPages()
.stream()
.filter(page -> page != pageNode)
.noneMatch(page -> page.getEntities().contains(textEntity)));
assertTrue(textEntity.getPages().contains(pageNode));
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
assertTrue(textEntity.getIntersectingNodes().stream().allMatch(node -> node.getEntities().contains(textEntity)));
assertTrue(textEntity.getIntersectingNodes()
.stream()
.allMatch(node -> node.getEntities().contains(textEntity)));
}
}

View File

@ -112,15 +112,15 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, true)).thenReturn(List.of(Type.builder()
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(DOSSIER_REDACTIONS_INDICATOR)
.dossierTemplateId(TEST_DOSSIER_ID)
.hexColor("#ffe187")
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.build()));
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(DOSSIER_REDACTIONS_INDICATOR)
.dossierTemplateId(TEST_DOSSIER_ID)
.hexColor("#ffe187")
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.build()));
mockDictionaryCalls(null);
@ -155,11 +155,11 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
long droolsStart = System.currentTimeMillis();
List<FileAttribute> fileAttributes = entityDroolsExecutionService.executeRules(kieContainer,
document,
dictionary,
Collections.emptyList(),
new ManualRedactions(),
new NerEntities());
document,
dictionary,
Collections.emptyList(),
new ManualRedactions(),
new NerEntities());
System.out.printf("Firing rules took %d ms\n", System.currentTimeMillis() - droolsStart);
System.out.printf("Total time %d ms\n", System.currentTimeMillis() - dictionarySearchStart);
@ -181,7 +181,8 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
float durationMillis = ((float) (System.currentTimeMillis() - start));
System.out.printf("%d calls of buildTextBlock() on document took %f s, average is %f ms\n", n, durationMillis / 1000, durationMillis / n);
Section section = document.getMainSections().get(8);
Section section = document.getMainSections()
.get(8);
start = System.currentTimeMillis();
for (int i = 0; i < n; i++) {
section.getTextBlock();
@ -245,11 +246,19 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
System.out.printf("%d Insertions took %s s, average %.2f ms\n", numberOfRuns, ((float) totalInsertTime / 1000), totalInsertTime / numberOfRuns);
System.out.printf("Found %d entities and saved %d\n", foundEntities.size(), document.getEntities().size());
for (TextEntity entity : document.getEntities()) {
var foundEntity = foundEntities.stream().filter(f -> f.getId().equals(entity.getId())).findFirst().get();
var foundEntity = foundEntities.stream()
.filter(f -> f.getId().equals(entity.getId()))
.findFirst()
.get();
assertTrue(foundEntity.getTextRange().equals(entity.getTextRange()) || foundEntity.getDuplicateTextRanges().contains(entity.getTextRange()));
}
assert document.getEntities().stream().mapToInt(e -> e.getDuplicateTextRanges().size() + 1).sum() == foundEntities.size();
assert foundEntities.stream().map(TextEntity::getId).distinct().count() == document.getEntities().size();
assert document.getEntities()
.stream()
.mapToInt(e -> e.getDuplicateTextRanges().size() + 1).sum() == foundEntities.size();
assert foundEntities.stream()
.map(TextEntity::getId)
.distinct()
.count() == document.getEntities().size();
drawAllEntities(filename, document);
}
@ -265,9 +274,11 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
.stream()
.filter(entityNode -> !entityNode.removed())
.filter(TextEntity::applied)
.flatMap(entityNode -> entityNode.getPositionsOnPagePerPage().stream())
.flatMap(entityNode -> entityNode.getPositionsOnPagePerPage()
.stream())
.filter(entityPosition -> entityPosition.getPage().equals(page))
.flatMap(entityPosition -> entityPosition.getRectanglePerLine().stream())
.flatMap(entityPosition -> entityPosition.getRectanglePerLine()
.stream())
.toList();
PdfVisualisationUtility.Options options = PdfVisualisationUtility.Options.builder().strokeColor(Color.BLACK).stroke(true).build();
@ -279,9 +290,11 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
.stream()
.filter(entityNode -> !entityNode.removed())
.filter(entityNode -> !entityNode.applied())
.flatMap(entityNode -> entityNode.getPositionsOnPagePerPage().stream())
.flatMap(entityNode -> entityNode.getPositionsOnPagePerPage()
.stream())
.filter(entityPosition -> entityPosition.getPage().equals(page))
.flatMap(entityPosition -> entityPosition.getRectanglePerLine().stream())
.flatMap(entityPosition -> entityPosition.getRectanglePerLine()
.stream())
.toList();
PdfVisualisationUtility.Options options = PdfVisualisationUtility.Options.builder().strokeColor(Color.BLUE).stroke(true).build();

View File

@ -15,8 +15,17 @@ public class DocumentTableIntegrationTest extends BuildDocumentIntegrationTest {
public void testAllTableCellAccessesCorrect() {
Document document = buildGraph("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06");
Page pageFive = document.getPages().stream().filter(pageNode -> pageNode.getNumber() == 5).findFirst().get();
Table table = pageFive.getMainBody().stream().filter(semanticNode -> semanticNode instanceof Table).map(semanticNode -> (Table) semanticNode).findFirst().get();
Page pageFive = document.getPages()
.stream()
.filter(pageNode -> pageNode.getNumber() == 5)
.findFirst()
.get();
Table table = pageFive.getMainBody()
.stream()
.filter(semanticNode -> semanticNode instanceof Table)
.map(semanticNode -> (Table) semanticNode)
.findFirst()
.get();
for (int row = 0; row < table.getNumberOfRows(); row++) {
for (int col = 0; col < table.getNumberOfCols(); col++) {

View File

@ -83,8 +83,8 @@ public class DocumentVisualizationIntegrationTest extends BuildDocumentIntegrati
PdfVisualisationUtility.drawDocumentGraph(pdDocument, document);
PdfVisualisationUtility.drawTextBlock(pdDocument,
textBlock,
PdfVisualisationUtility.Options.builder().stroke(true).strokeWidth(0.1f).strokeColor(Color.YELLOW).build());
textBlock,
PdfVisualisationUtility.Options.builder().stroke(true).strokeWidth(0.1f).strokeColor(Color.YELLOW).build());
File outputFile = new File(tmpFileName);
pdDocument.save(outputFile);
}

View File

@ -27,7 +27,8 @@ public class SearchImplementationTest extends BuildDocumentIntegrationTest {
SearchImplementation searchImplementation = new SearchImplementation(List.of("mydossierredaction"), true);
EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService);
List<TextEntity> entities = entityCreationService.bySearchImplementation(searchImplementation, "dossier_redaction", EntityType.ENTITY, document).toList();
List<TextEntity> entities = entityCreationService.bySearchImplementation(searchImplementation, "dossier_redaction", EntityType.ENTITY, document)
.toList();
assertEquals(2, entities.size());
}

View File

@ -79,6 +79,7 @@ class DroolsSyntaxValidationServiceTest {
assertTrue(droolsSyntaxValidation.isCompiled());
}
@Test
@SneakyThrows
void testRulesWithAddedImports() {
@ -96,6 +97,7 @@ class DroolsSyntaxValidationServiceTest {
assertTrue(droolsSyntaxValidation.isCompiled());
}
@Test
@SneakyThrows
void testDocumineRules() {

View File

@ -26,7 +26,9 @@ public class DroolsUpToDateTest {
public void assertAllRuleFilesAreUpToDate() {
Path droolsPath = new ClassPathResource("drools").getFile().toPath();
Files.walk(droolsPath).filter(DroolsUpToDateTest::isEntityRuleFile).forEach(this::validateFile);
Files.walk(droolsPath)
.filter(DroolsUpToDateTest::isEntityRuleFile)
.forEach(this::validateFile);
}

View File

@ -127,15 +127,15 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest {
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder()
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(DOSSIER_REDACTIONS_INDICATOR)
.dossierTemplateId(TEST_DOSSIER_ID)
.hexColor("#ffe187")
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.build()));
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(DOSSIER_REDACTIONS_INDICATOR)
.dossierTemplateId(TEST_DOSSIER_ID)
.hexColor("#ffe187")
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.build()));
mockDictionaryCalls(null);
@ -155,29 +155,39 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest {
String testEntityValue1 = "Desiree";
String testEntityValue2 = "Melanie";
EntityLog redactionLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
assertEquals(2, redactionLog.getEntityLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue1)).count());
assertEquals(2, redactionLog.getEntityLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue2)).count());
assertEquals(2,
redactionLog.getEntityLogEntry()
.stream()
.filter(entry -> entry.getValue().equals(testEntityValue1))
.count());
assertEquals(2,
redactionLog.getEntityLogEntry()
.stream()
.filter(entry -> entry.getValue().equals(testEntityValue2))
.count());
Document document = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(TEST_DOSSIER_ID, TEST_FILE_ID));
String expandedEntityKeyword = "Lorem ipsum dolor sit amet, consectetur adipiscing elit Desiree et al sed do eiusmod tempor incididunt ut labore et dolore magna aliqua Melanie et al. Reference No 12345 Lorem ipsum.";
entityCreationService.byString(expandedEntityKeyword, "PII", EntityType.ENTITY, document).findFirst().get();
entityCreationService.byString(expandedEntityKeyword, "PII", EntityType.ENTITY, document)
.findFirst()
.get();
String idToResize = redactionLog.getEntityLogEntry()
.stream()
.filter(entry -> entry.getValue().equals(testEntityValue1))
.max(Comparator.comparingInt(EntityLogEntry::getStartOffset))
.get()
.getId();
.get().getId();
ManualRedactions manualRedactions = new ManualRedactions();
manualRedactions.getResizeRedactions().add(ManualResizeRedaction.builder()
.annotationId(idToResize)
.value(expandedEntityKeyword)
.positions(List.of(Rectangle.builder().topLeftX(56.8f).topLeftY(454.664f).height(15.408f).width(493.62f).page(3).build(),
Rectangle.builder().topLeftX(56.8f).topLeftY(440.864f).height(15.408f).width(396f).page(3).build()))
.addToAllDossiers(false)
.updateDictionary(false)
.requestDate(OffsetDateTime.now())
.build());
manualRedactions.getResizeRedactions()
.add(ManualResizeRedaction.builder()
.annotationId(idToResize)
.value(expandedEntityKeyword)
.positions(List.of(Rectangle.builder().topLeftX(56.8f).topLeftY(454.664f).height(15.408f).width(493.62f).page(3).build(),
Rectangle.builder().topLeftX(56.8f).topLeftY(440.864f).height(15.408f).width(396f).page(3).build()))
.addToAllDossiers(false)
.updateDictionary(false)
.requestDate(OffsetDateTime.now())
.build());
request.setManualRedactions(manualRedactions);
analyzeService.reanalyze(request);
@ -188,21 +198,32 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest {
try (FileOutputStream fileOutputStream = new FileOutputStream(tmpFile)) {
fileOutputStream.write(annotateResponse.getDocument());
}
EntityLogEntry resizedEntry = redactionLog.getEntityLogEntry().stream().filter(entry -> entry.getValue().equals(expandedEntityKeyword)).findFirst().get();
EntityLogEntry resizedEntry = redactionLog.getEntityLogEntry()
.stream()
.filter(entry -> entry.getValue().equals(expandedEntityKeyword))
.findFirst()
.get();
assertEquals(idToResize, resizedEntry.getId());
assertEquals(1, redactionLog.getEntityLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue1)).count());
assertEquals(1,
redactionLog.getEntityLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue2) && !entry.getState().equals(EntryState.REMOVED)).count());
redactionLog.getEntityLogEntry()
.stream()
.filter(entry -> entry.getValue().equals(testEntityValue1))
.count());
assertEquals(1,
redactionLog.getEntityLogEntry()
.stream()
.filter(entry -> entry.getValue().equals(testEntityValue2) && !entry.getState().equals(EntryState.REMOVED))
.count());
}
private static com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle toAnnotationRectangle(Rectangle2D rectangle2D, int pageNumber) {
return new com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle((float) rectangle2D.getMaxX(),
(float) rectangle2D.getMaxY() - (float) rectangle2D.getHeight(),
(float) rectangle2D.getWidth(),
-(float) rectangle2D.getHeight(),
pageNumber);
(float) rectangle2D.getMaxY() - (float) rectangle2D.getHeight(),
(float) rectangle2D.getWidth(),
-(float) rectangle2D.getHeight(),
pageNumber);
}
@ -219,10 +240,10 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest {
manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder().annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf").fileId("fileId").build()));
manualRedactions.setForceRedactions(Set.of(ManualForceRedaction.builder()
.annotationId("675eba69b0c2917de55462c817adaa05")
.fileId("fileId")
.legalBasis("Something")
.build()));
.annotationId("675eba69b0c2917de55462c817adaa05")
.fileId("fileId")
.legalBasis("Something")
.build()));
ManualRedactionEntry manualRedactionEntry = new ManualRedactionEntry();
manualRedactionEntry.setAnnotationId(manualAddId);
@ -232,7 +253,7 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest {
manualRedactionEntry.setValue("O'Loughlin C.K.");
manualRedactionEntry.setReason("Manual Redaction");
manualRedactionEntry.setPositions(List.of(Rectangle.builder().topLeftX(375.61096f).topLeftY(241.282f).width(7.648041f).height(43.72262f).page(1).build(),
Rectangle.builder().topLeftX(384.83517f).topLeftY(241.282f).width(7.648041f).height(17.043358f).page(1).build()));
Rectangle.builder().topLeftX(384.83517f).topLeftY(241.282f).width(7.648041f).height(17.043358f).page(1).build()));
AnalyzeRequest request = uploadFileToStorage(pdfFile);
request.setManualRedactions(manualRedactions);
@ -242,11 +263,11 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest {
manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder().annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf").fileId("fileId").build()));
manualRedactions.setLegalBasisChanges((Set.of(ManualLegalBasisChange.builder()
.annotationId("675eba69b0c2917de55462c817adaa05")
.fileId("fileId")
.legalBasis("Manual Legal Basis Change")
.requestDate(OffsetDateTime.now())
.build())));
.annotationId("675eba69b0c2917de55462c817adaa05")
.fileId("fileId")
.legalBasis("Manual Legal Basis Change")
.requestDate(OffsetDateTime.now())
.build())));
analyzeService.reanalyze(request);
@ -295,7 +316,10 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest {
fileOutputStream.write(annotateResponse.getDocument());
}
long end = System.currentTimeMillis();
var optionalEntry = redactionLog.getEntityLogEntry().stream().filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)).findAny();
var optionalEntry = redactionLog.getEntityLogEntry()
.stream()
.filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId))
.findAny();
assertTrue(optionalEntry.isPresent());
assertEquals(2, optionalEntry.get().getContainingNodeId().size()); // 2 is the depth of the table instead of the table cell
System.out.println("duration: " + (end - start));
@ -345,9 +369,9 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest {
EntityLog redactionLog2 = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
assertFalse(redactionLog2.getEntityLogEntry()
.stream()
.filter(entry -> entry.getType().equals("published_information"))
.anyMatch(entry -> entry.getValue().equals("Oxford University Press")));
.stream()
.filter(entry -> entry.getType().equals("published_information"))
.anyMatch(entry -> entry.getValue().equals("Oxford University Press")));
var oxfordUniversityPressRecategorized = redactionLog2.getEntityLogEntry()
.stream()
@ -381,15 +405,15 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest {
String annotationId = "testAnnotationId";
manualRedactions.setEntriesToAdd(Set.of(ManualRedactionEntry.builder()
.annotationId(annotationId)
.requestDate(OffsetDateTime.now())
.type("manual")
.value("Expand to Hint Clarissas Donut ← not added to Dict, should be not annotated Simpson's Tower ← added to Authors-Dict, should be annotated")
.positions(List.of(//
new Rectangle(new Point(56.8f, 496.27f), 61.25f, 12.83f, 2), //
new Rectangle(new Point(56.8f, 482.26f), 303.804f, 15.408f, 2), //
new Rectangle(new Point(56.8f, 468.464f), 314.496f, 15.408f, 2))) //
.build()));
.annotationId(annotationId)
.requestDate(OffsetDateTime.now())
.type("manual")
.value("Expand to Hint Clarissas Donut ← not added to Dict, should be not annotated Simpson's Tower ← added to Authors-Dict, should be annotated")
.positions(List.of(//
new Rectangle(new Point(56.8f, 496.27f), 61.25f, 12.83f, 2), //
new Rectangle(new Point(56.8f, 482.26f), 303.804f, 15.408f, 2), //
new Rectangle(new Point(56.8f, 468.464f), 314.496f, 15.408f, 2))) //
.build()));
ManualResizeRedaction manualResizeRedaction = ManualResizeRedaction.builder()
.annotationId(annotationId)
.requestDate(OffsetDateTime.now())

View File

@ -32,18 +32,31 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest {
public void manualResizeRedactionTest() {
Document document = buildGraph("files/new/crafted document");
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document)
.collect(Collectors.toUnmodifiableSet());
Set<TextEntity> biggerEntities = entityCreationService.byString("David Ksenia Max Mustermann", "CBI_author", EntityType.ENTITY, document)
.collect(Collectors.toUnmodifiableSet());
TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
TextEntity biggerEntity = biggerEntities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
TextEntity entity = entities.stream()
.filter(e -> e.getPages()
.stream()
.anyMatch(p -> p.getNumber() == 1))
.findFirst()
.get();
TextEntity biggerEntity = biggerEntities.stream()
.filter(e -> e.getPages()
.stream()
.anyMatch(p -> p.getNumber() == 1))
.findFirst()
.get();
String initialId = entity.getPositionsOnPagePerPage().get(0).getId();
String initialId = entity.getPositionsOnPagePerPage()
.get(0).getId();
ManualResizeRedaction manualResizeRedaction = ManualResizeRedaction.builder()
.annotationId(initialId)
.value(biggerEntity.getValue())
.positions(toAnnotationRectangles(biggerEntity.getPositionsOnPagePerPage().get(0)))
.positions(toAnnotationRectangles(biggerEntity.getPositionsOnPagePerPage()
.get(0)))
.requestDate(OffsetDateTime.now())
.updateDictionary(false)
.build();
@ -55,8 +68,13 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest {
assertTrue(Sets.difference(new HashSet<>(biggerEntity.getIntersectingNodes()), new HashSet<>(entity.getIntersectingNodes())).isEmpty());
assertEquals(biggerEntity.getPages(), entity.getPages());
assertEquals(biggerEntity.getValue(), entity.getValue());
assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId());
assertRectanglesAlmostEqual(biggerEntity.getPositionsOnPagePerPage().get(0).getRectanglePerLine(), entity.getPositionsOnPagePerPage().get(0).getRectanglePerLine());
assertEquals(initialId,
entity.getPositionsOnPagePerPage()
.get(0).getId());
assertRectanglesAlmostEqual(biggerEntity.getPositionsOnPagePerPage()
.get(0).getRectanglePerLine(),
entity.getPositionsOnPagePerPage()
.get(0).getRectanglePerLine());
assertTrue(entity.resized());
}
@ -65,11 +83,18 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest {
public void manualForceRedactionTest() {
Document document = buildGraph("files/new/crafted document");
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document)
.collect(Collectors.toUnmodifiableSet());
TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
TextEntity entity = entities.stream()
.filter(e -> e.getPages()
.stream()
.anyMatch(p -> p.getNumber() == 1))
.findFirst()
.get();
String initialId = entity.getPositionsOnPagePerPage().get(0).getId();
String initialId = entity.getPositionsOnPagePerPage()
.get(0).getId();
ManualForceRedaction manualForceRedaction = ManualForceRedaction.builder().annotationId(initialId).legalBasis("Something").requestDate(OffsetDateTime.now()).build();
doAnalysis(document, List.of(manualForceRedaction));
@ -78,8 +103,12 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest {
assertFalse(entity.getIntersectingNodes().isEmpty());
assertEquals(1, entity.getPages().size());
assertEquals("David Ksenia", entity.getValue());
assertEquals("Something", entity.getManualOverwrite().getLegalBasis().orElse(entity.getMatchedRule().getLegalBasis()));
assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId());
assertEquals("Something",
entity.getManualOverwrite().getLegalBasis()
.orElse(entity.getMatchedRule().getLegalBasis()));
assertEquals(initialId,
entity.getPositionsOnPagePerPage()
.get(0).getId());
assertFalse(entity.removed());
assertTrue(entity.hasManualChanges());
assertTrue(entity.applied());
@ -90,17 +119,26 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest {
public void manualIDRemovalTest() {
Document document = buildGraph("files/new/crafted document");
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document)
.collect(Collectors.toUnmodifiableSet());
TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
TextEntity entity = entities.stream()
.filter(e -> e.getPages()
.stream()
.anyMatch(p -> p.getNumber() == 1))
.findFirst()
.get();
String initialId = entity.getPositionsOnPagePerPage().get(0).getId();
String initialId = entity.getPositionsOnPagePerPage()
.get(0).getId();
IdRemoval idRemoval = IdRemoval.builder().annotationId(initialId).requestDate(OffsetDateTime.now()).build();
doAnalysis(document, List.of(idRemoval));
assertEquals("David Ksenia", entity.getValue());
assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId());
assertEquals(initialId,
entity.getPositionsOnPagePerPage()
.get(0).getId());
assertTrue(entity.ignored());
}
@ -109,11 +147,18 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest {
public void manualIDRemovalButAlsoForceRedactionTest() {
Document document = buildGraph("files/new/crafted document");
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document)
.collect(Collectors.toUnmodifiableSet());
TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
TextEntity entity = entities.stream()
.filter(e -> e.getPages()
.stream()
.anyMatch(p -> p.getNumber() == 1))
.findFirst()
.get();
String initialId = entity.getPositionsOnPagePerPage().get(0).getId();
String initialId = entity.getPositionsOnPagePerPage()
.get(0).getId();
IdRemoval idRemoval = IdRemoval.builder().annotationId(initialId).requestDate(OffsetDateTime.now()).build();
ManualForceRedaction manualForceRedaction = ManualForceRedaction.builder().annotationId(initialId).legalBasis("Something").requestDate(OffsetDateTime.now()).build();
@ -123,7 +168,9 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest {
assertFalse(entity.getIntersectingNodes().isEmpty());
assertEquals(1, entity.getPages().size());
assertEquals("David Ksenia", entity.getValue());
assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId());
assertEquals(initialId,
entity.getPositionsOnPagePerPage()
.get(0).getId());
assertFalse(entity.removed());
assertFalse(entity.ignored());
}
@ -131,7 +178,9 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest {
private void assertRectanglesAlmostEqual(Collection<Rectangle2D> rects1, Collection<Rectangle2D> rects2) {
if (rects1.stream().allMatch(rect1 -> rects2.stream().anyMatch(rect2 -> rectanglesAlmostEqual(rect1, rect2)))) {
if (rects1.stream()
.allMatch(rect1 -> rects2.stream()
.anyMatch(rect2 -> rectanglesAlmostEqual(rect1, rect2)))) {
return;
}
// use this for nice formatting of error message
@ -143,15 +192,18 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest {
double tolerance = 1e-1;
return Math.abs(r1.getX() - r2.getX()) < tolerance &&//
Math.abs(r1.getY() - r2.getY()) < tolerance &&//
Math.abs(r1.getWidth() - r2.getWidth()) < tolerance &&//
Math.abs(r1.getHeight() - r2.getHeight()) < tolerance;
Math.abs(r1.getY() - r2.getY()) < tolerance &&//
Math.abs(r1.getWidth() - r2.getWidth()) < tolerance &&//
Math.abs(r1.getHeight() - r2.getHeight()) < tolerance;
}
private static List<Rectangle> toAnnotationRectangles(PositionOnPage positionsOnPage) {
return positionsOnPage.getRectanglePerLine().stream().map(rectangle2D -> toAnnotationRectangle(rectangle2D, positionsOnPage.getPage().getNumber())).toList();
return positionsOnPage.getRectanglePerLine()
.stream()
.map(rectangle2D -> toAnnotationRectangle(rectangle2D, positionsOnPage.getPage().getNumber()))
.toList();
}

View File

@ -43,7 +43,9 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest {
OffsetDateTime start = OffsetDateTime.now();
String reason = "whatever";
Document document = buildGraphNoImages("files/new/crafted document.pdf");
List<TextEntity> entities = entityCreationService.byString("David Ksenia", "test", EntityType.ENTITY, document).peek(e -> e.apply("T.0.0", reason)).toList();
List<TextEntity> entities = entityCreationService.byString("David Ksenia", "test", EntityType.ENTITY, document)
.peek(e -> e.apply("T.0.0", reason))
.toList();
assertFalse(entities.isEmpty());
TextEntity entity = entities.get(0);
assertTrue(entity.active());
@ -52,7 +54,8 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest {
assertFalse(entity.resized());
assertFalse(entity.ignored());
assertEquals("n-a", entity.getMatchedRule().getLegalBasis());
String annotationId = entity.getPositionsOnPagePerPage().get(0).getId();
String annotationId = entity.getPositionsOnPagePerPage()
.get(0).getId();
// remove first
IdRemoval removal = IdRemoval.builder().requestDate(start).fileId(TEST_FILE_ID).annotationId(annotationId).build();
@ -73,7 +76,9 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest {
assertFalse(entity.ignored());
assertFalse(entity.removed());
assertEquals(reason + ", removed by manual override, forced by manual override", entity.buildReasonWithManualChangeDescriptions());
assertEquals("coolio", entity.getManualOverwrite().getLegalBasis().orElse(entity.getMatchedRule().getLegalBasis()));
assertEquals("coolio",
entity.getManualOverwrite().getLegalBasis()
.orElse(entity.getMatchedRule().getLegalBasis()));
// remove again
IdRemoval removal2 = IdRemoval.builder().requestDate(start.plusSeconds(3)).fileId(TEST_FILE_ID).annotationId(annotationId).build();
@ -93,7 +98,7 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest {
assertTrue(entity.ignored());
assertFalse(entity.applied());
assertEquals(reason + ", removed by manual override, forced by manual override, forced by manual override, removed by manual override",
entity.buildReasonWithManualChangeDescriptions());
entity.buildReasonWithManualChangeDescriptions());
String legalBasis = "Yeah";
String section = "Some random section!";
@ -110,10 +115,16 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest {
assertTrue(entity.ignored());
assertFalse(entity.applied());
assertEquals(reason + ", removed by manual override, forced by manual override, forced by manual override, removed by manual override, legal basis was manually changed",
entity.buildReasonWithManualChangeDescriptions());
assertEquals(value, entity.getManualOverwrite().getValue().orElse(entity.getValue()));
assertEquals(legalBasis, entity.getManualOverwrite().getLegalBasis().orElse(entity.getMatchedRule().getLegalBasis()));
assertEquals(section, entity.getManualOverwrite().getSection().orElse(entity.getDeepestFullyContainingNode().toString()));
entity.buildReasonWithManualChangeDescriptions());
assertEquals(value,
entity.getManualOverwrite().getValue()
.orElse(entity.getValue()));
assertEquals(legalBasis,
entity.getManualOverwrite().getLegalBasis()
.orElse(entity.getMatchedRule().getLegalBasis()));
assertEquals(section,
entity.getManualOverwrite().getSection()
.orElse(entity.getDeepestFullyContainingNode().toString()));
ManualRecategorization imageRecategorizationRequest = ManualRecategorization.builder()
.type("type")
@ -122,9 +133,13 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest {
.legalBasis("")
.build();
entity.getManualOverwrite().addChange(imageRecategorizationRequest);
assertTrue(entity.getManualOverwrite().getRecategorized().isPresent());
assertTrue(entity.getManualOverwrite().getRecategorized().get());
assertEquals("type", entity.getManualOverwrite().getType().orElse(entity.type()));
assertTrue(entity.getManualOverwrite().getRecategorized()
.isPresent());
assertTrue(entity.getManualOverwrite().getRecategorized()
.get());
assertEquals("type",
entity.getManualOverwrite().getType()
.orElse(entity.type()));
}
@ -134,7 +149,9 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest {
OffsetDateTime start = OffsetDateTime.now();
String reason = "whatever";
Document document = buildGraphNoImages("files/new/crafted document.pdf");
List<TextEntity> entities = entityCreationService.byString("David Ksenia", "test", EntityType.HINT, document).peek(e -> e.apply("T.0.0", reason)).toList();
List<TextEntity> entities = entityCreationService.byString("David Ksenia", "test", EntityType.HINT, document)
.peek(e -> e.apply("T.0.0", reason))
.toList();
assertFalse(entities.isEmpty());
TextEntity entity = entities.get(0);
assertTrue(entity.active());
@ -143,7 +160,8 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest {
assertFalse(entity.resized());
assertFalse(entity.ignored());
assertEquals("n-a", entity.getMatchedRule().getLegalBasis());
String annotationId = entity.getPositionsOnPagePerPage().get(0).getId();
String annotationId = entity.getPositionsOnPagePerPage()
.get(0).getId();
// remove first
IdRemoval removal = IdRemoval.builder().requestDate(start).fileId(TEST_FILE_ID).annotationId(annotationId).build();
@ -164,7 +182,9 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest {
assertFalse(entity.ignored());
assertFalse(entity.removed());
assertEquals(reason + ", removed by manual override, forced by manual override", entity.buildReasonWithManualChangeDescriptions());
assertEquals("coolio", entity.getManualOverwrite().getLegalBasis().orElse(entity.getMatchedRule().getLegalBasis()));
assertEquals("coolio",
entity.getManualOverwrite().getLegalBasis()
.orElse(entity.getMatchedRule().getLegalBasis()));
}

View File

@ -122,17 +122,20 @@ public class PrecursorEntityTest extends BuildDocumentIntegrationTest {
assertTrue(document.getEntities().isEmpty());
List<PrecursorEntity> notFoundManualEntities = entityFromPrecursorCreationService.createEntitiesIfFoundAndReturnNotFoundEntries(ManualRedactions.builder().entriesToAdd(Set.of(manualRedactionEntry)).build(),
document,
TEST_DOSSIER_TEMPLATE_ID);
List<PrecursorEntity> notFoundManualEntities = entityFromPrecursorCreationService.createEntitiesIfFoundAndReturnNotFoundEntries(ManualRedactions.builder()
.entriesToAdd(Set.of(
manualRedactionEntry))
.build(),
document,
TEST_DOSSIER_TEMPLATE_ID);
assertEquals(1, notFoundManualEntities.size());
assertTrue(document.getEntities().isEmpty());
List<EntityLogEntry> redactionLogEntries = entityLogCreatorService.createInitialEntityLog(new AnalyzeRequest(),
document,
notFoundManualEntities,
new DictionaryVersion(),
0L).getEntityLogEntry();
document,
notFoundManualEntities,
new DictionaryVersion(),
0L).getEntityLogEntry();
assertEquals(1, redactionLogEntries.size());
assertEquals(value, redactionLogEntries.get(0).getValue());
@ -146,7 +149,8 @@ public class PrecursorEntityTest extends BuildDocumentIntegrationTest {
Document document = buildGraph("files/new/VV-919901.pdf");
EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService);
List<TextEntity> tempEntities = entityCreationService.byString("To: Syngenta Ltd.", "temp", EntityType.ENTITY, document).toList();
List<TextEntity> tempEntities = entityCreationService.byString("To: Syngenta Ltd.", "temp", EntityType.ENTITY, document)
.toList();
assertFalse(tempEntities.isEmpty());
var tempEntity = tempEntities.get(0);
List<Rectangle> positions = tempEntity.getPositionsOnPagePerPage()
@ -172,21 +176,28 @@ public class PrecursorEntityTest extends BuildDocumentIntegrationTest {
tempEntity.removeFromGraph();
assertTrue(document.getEntities().isEmpty());
List<PrecursorEntity> notFoundManualEntities = entityFromPrecursorCreationService.createEntitiesIfFoundAndReturnNotFoundEntries(ManualRedactions.builder().entriesToAdd(Set.of(manualRedactionEntry)).build(),
document,
TEST_DOSSIER_TEMPLATE_ID);
List<PrecursorEntity> notFoundManualEntities = entityFromPrecursorCreationService.createEntitiesIfFoundAndReturnNotFoundEntries(ManualRedactions.builder()
.entriesToAdd(Set.of(
manualRedactionEntry))
.build(),
document,
TEST_DOSSIER_TEMPLATE_ID);
assertTrue(notFoundManualEntities.isEmpty());
assertEquals(1, document.getEntities().size());
return new DocumentAndEntity(document, document.getEntities().stream().findFirst().get());
return new DocumentAndEntity(document,
document.getEntities()
.stream()
.findFirst()
.get());
}
public static Rectangle toAnnotationRectangle(Rectangle2D rectangle2D, int pageNumber) {
return new Rectangle(new Point((float) rectangle2D.getMinX(), (float) (rectangle2D.getMinY() + rectangle2D.getHeight())),
(float) rectangle2D.getWidth(),
-(float) rectangle2D.getHeight(),
pageNumber);
(float) rectangle2D.getWidth(),
-(float) rectangle2D.getHeight(),
pageNumber);
}

View File

@ -22,7 +22,6 @@ public class AnalyseFileRealDataIntegrationTest extends LiveDataIntegrationTest
public static final String FILE_NAME = "test-file";
@Test
@SneakyThrows
public void testFile() {
@ -41,13 +40,15 @@ public class AnalyseFileRealDataIntegrationTest extends LiveDataIntegrationTest
redactionServiceSettings.setNerServiceEnabled(false);
}
AnalyzeRequest ar = AnalyzeRequest.builder().fileId(FILE_ID).dossierId(DOSSIER_ID)
.analysisNumber(1).dossierTemplateId(DOSSIER_TEMPLATE_ID)
AnalyzeRequest ar = AnalyzeRequest.builder()
.fileId(FILE_ID)
.dossierId(DOSSIER_ID)
.analysisNumber(1)
.dossierTemplateId(DOSSIER_TEMPLATE_ID)
.lastProcessed(OffsetDateTime.now())
.fileAttributes(List.of())
.build();
// try {
// var redactionLog = new ClassPathResource(BASE_DIR + "data/" + FILE_NAME + ".redaction-log.json").getInputStream();
// } catch (Exception e) {

View File

@ -159,14 +159,19 @@ public class LiveDataIntegrationTest {
when(dictionaryClient.getAllTypesForDossier(anyString(), anyBoolean())).thenReturn(new ArrayList<>());
when(dictionaryClient.getColors(anyString())).thenReturn(objectMapper.readValue(new ClassPathResource(BASE_DIR + EFSA_SANITISATION_GFL_V1 + "colors.json").getInputStream(),
Colors.class));
Colors.class));
when(dictionaryClient.getDictionaryForType(anyString(), nullable(Long.class))).then(answer -> {
String typeName = answer.getArgument(0);
var found = types.stream().filter(t -> t.getType().equalsIgnoreCase(typeName)).findFirst();
var found = types.stream()
.filter(t -> t.getType().equalsIgnoreCase(typeName))
.findFirst();
if (found.isPresent()) {
var type = types.stream().filter(t -> t.getType().equalsIgnoreCase(typeName)).findFirst().get();
var type = types.stream()
.filter(t -> t.getType().equalsIgnoreCase(typeName))
.findFirst()
.get();
type.setEntries(getEntries(typeName, type.getTypeId()));
type.setFalsePositiveEntries(getFalsePositiveEntries(typeName, type.getTypeId()));
@ -188,13 +193,20 @@ public class LiveDataIntegrationTest {
when(dictionaryClient.getDictionaryForType(anyString(), nullable(Long.class))).then(answer -> {
String typeName = answer.getArgument(0);
var found = types.stream().filter(t -> t.getType().equalsIgnoreCase(typeName)).findFirst();
var found = types.stream()
.filter(t -> t.getType().equalsIgnoreCase(typeName))
.findFirst();
if (found.isPresent()) {
var type = types.stream().filter(t -> t.getType().equalsIgnoreCase(typeName)).findFirst().get();
var type = types.stream()
.filter(t -> t.getType().equalsIgnoreCase(typeName))
.findFirst()
.get();
if (deltaTypeName.equalsIgnoreCase(typeName)) {
type.setEntries(values.stream().map(l -> new DictionaryEntry(0, l, version, false, type.getTypeId())).collect(Collectors.toList()));
type.setEntries(values.stream()
.map(l -> new DictionaryEntry(0, l, version, false, type.getTypeId()))
.collect(Collectors.toList()));
} else {
type.setEntries(new ArrayList<>());
}
@ -226,7 +238,9 @@ public class LiveDataIntegrationTest {
if (Objects.requireNonNull(resource.getFilename()).contains(typeName) && !resource.getFilename().contains("false_positive")) {
List<String> lines = IOUtils.readLines(resource.getInputStream());
return lines.stream().map(l -> new DictionaryEntry(0, l, 0L, false, typeId)).collect(Collectors.toList());
return lines.stream()
.map(l -> new DictionaryEntry(0, l, 0L, false, typeId))
.collect(Collectors.toList());
}
}
@ -242,7 +256,9 @@ public class LiveDataIntegrationTest {
if (Objects.requireNonNull(resource.getFilename()).contains(typeName) && resource.getFilename().contains("false_positive")) {
List<String> lines = IOUtils.readLines(resource.getInputStream());
return lines.stream().map(l -> new DictionaryEntry(0, l, 0L, false, typeId)).collect(Collectors.toList());
return lines.stream()
.map(l -> new DictionaryEntry(0, l, 0L, false, typeId))
.collect(Collectors.toList());
}
}

View File

@ -68,13 +68,13 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
Document document = buildGraphNoImages(filePath);
List<NerEntities.NerEntity> entityRecognitionEntities = validateAndCombine(parseNerEntities(nerEntitiesFilePath), document);
assertFalse(entityRecognitionEntities.isEmpty());
assertTrue(entityRecognitionEntities.stream().allMatch(entity -> entity.textRange().start() < entity.textRange().end()));
assertTrue(entityRecognitionEntities.stream()
.allMatch(entity -> entity.textRange().start() < entity.textRange().end()));
ClassPathResource resource = new ClassPathResource(filePath);
try (PDDocument pdDocument = Loader.loadPDF(resource.getFile())) {
Stream<NerEntities.NerEntity> unchangedAddressParts = NerEntitiesAdapter.toNerEntities(parseNerEntities(nerEntitiesFilePath), document)
.getNerEntityList()
Stream<NerEntities.NerEntity> unchangedAddressParts = NerEntitiesAdapter.toNerEntities(parseNerEntities(nerEntitiesFilePath), document).getNerEntityList()
.stream()
.filter(e -> !e.type().equals("CBI_author"));
List<TextEntity> redactionEntities = Stream.concat(entityRecognitionEntities.stream(), unchangedAddressParts)
@ -83,12 +83,15 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
.map(Optional::get)
.toList();
redactionEntities.stream()
.collect(Collectors.groupingBy(e -> e.getPages().stream().findFirst().get().getNumber()))
.collect(Collectors.groupingBy(e -> e.getPages()
.stream()
.findFirst()
.get().getNumber()))
.forEach((pageNumber, entities) -> drawNerEntitiesAsPartsAndCombined(pageNumber,
getPositionsFromEntityOfType("CBI_author", entities),
getPositionsFromEntityNotOfType(List.of("CBI_author", "CBI_address"), entities),
getPositionsFromEntityOfType("CBI_address", entities),
pdDocument));
getPositionsFromEntityOfType("CBI_author", entities),
getPositionsFromEntityNotOfType(List.of("CBI_author", "CBI_address"), entities),
getPositionsFromEntityOfType("CBI_address", entities),
pdDocument));
File outputFile = new File("/tmp/nerEntities.pdf");
pdDocument.save(outputFile);
@ -109,7 +112,8 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
log.info("Parsed NerEntitiesModel");
NerEntities nerEntities = NerEntitiesAdapter.toNerEntities(nerEntitiesModel, document);
log.info("Validated and mapped");
List<TextRange> nerEntityBoundaries = NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities).toList();
List<TextRange> nerEntityBoundaries = NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities)
.toList();
log.info("Combined to CBI_address");
List<TextEntity> cbiAddressEntities = nerEntityBoundaries.stream()
.map(b -> entityCreationService.byTextRange(b, "CBI_address", EntityType.RECOMMENDATION, document))
@ -117,25 +121,28 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
.map(Optional::get)
.toList();
assertFalse(cbiAddressEntities.isEmpty());
assertTrue(cbiAddressEntities.stream().allMatch(entity -> entity.getTextRange().start() < entity.getTextRange().end()));
assertTrue(cbiAddressEntities.stream()
.allMatch(entity -> entity.getTextRange().start() < entity.getTextRange().end()));
ClassPathResource resource = new ClassPathResource(filePath);
try (PDDocument pdDocument = Loader.loadPDF(resource.getFile())) {
List<TextEntity> validatedEntities = NerEntitiesAdapter.toNerEntities(parseNerEntities(nerEntitiesFilePath), document)
.getNerEntityList()
List<TextEntity> validatedEntities = NerEntitiesAdapter.toNerEntities(parseNerEntities(nerEntitiesFilePath), document).getNerEntityList()
.stream()
.map(e -> entityCreationService.byTextRange(e.textRange(), e.type(), EntityType.ENTITY, document))
.filter(Optional::isPresent)
.map(Optional::get)
.toList();
Stream.concat(cbiAddressEntities.stream(), validatedEntities.stream())
.collect(Collectors.groupingBy(e -> e.getPages().stream().findFirst().get().getNumber()))
.collect(Collectors.groupingBy(e -> e.getPages()
.stream()
.findFirst()
.get().getNumber()))
.forEach((pageNumber, entities) -> drawNerEntitiesAsPartsAndCombined(pageNumber,
getPositionsFromEntityOfType("CBI_author", entities),
getPositionsFromEntityNotOfType(List.of("CBI_author", "CBI_address"), entities),
getPositionsFromEntityOfType("CBI_address", entities),
pdDocument));
getPositionsFromEntityOfType("CBI_author", entities),
getPositionsFromEntityNotOfType(List.of("CBI_author", "CBI_address"), entities),
getPositionsFromEntityOfType("CBI_address", entities),
pdDocument));
File outputFile = new File("/tmp/nerEntities.pdf");
pdDocument.save(outputFile);
@ -147,11 +154,13 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
NerEntities nerEntities = NerEntitiesAdapter.toNerEntities(nerEntitiesModel, document);
List<NerEntities.NerEntity> cbiAuthors = nerEntities.streamEntitiesOfType("CBI_author").toList();
List<NerEntities.NerEntity> cbiAuthors = nerEntities.streamEntitiesOfType("CBI_author")
.toList();
Stream<NerEntities.NerEntity> cbiAddress = NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities)
.map(boundary -> new NerEntities.NerEntity(document.getTextBlock().subSequence(boundary).toString(), boundary, "CBI_address"));
return Stream.concat(cbiAuthors.stream(), cbiAddress).toList();
return Stream.concat(cbiAuthors.stream(), cbiAddress)
.toList();
}
@ -167,14 +176,17 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
private List<Rectangle2D> getPositionsFromEntityOfType(String type, List<TextEntity> entities) {
return getPositionsFromEntities(entities.stream().filter(e -> e.type().equals(type)));
return getPositionsFromEntities(entities.stream()
.filter(e -> e.type().equals(type)));
}
private List<Rectangle2D> getPositionsFromEntityNotOfType(List<String> types, List<TextEntity> entities) {
return getPositionsFromEntities(entities.stream().filter(e -> types.stream().noneMatch(type -> e.type().equals(type))));
return getPositionsFromEntities(entities.stream()
.filter(e -> types.stream()
.noneMatch(type -> e.type().equals(type))));
}
@ -188,13 +200,13 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
PdfVisualisationUtility.drawRectangle2DList(pdDocument, pageNumber, cbiAuthorRects, PdfVisualisationUtility.Options.builder().stroke(true).strokeColor(Color.blue).build());
PdfVisualisationUtility.drawRectangle2DList(pdDocument,
pageNumber,
addressPartsRects,
PdfVisualisationUtility.Options.builder().stroke(true).strokeColor(Color.MAGENTA).build());
pageNumber,
addressPartsRects,
PdfVisualisationUtility.Options.builder().stroke(true).strokeColor(Color.MAGENTA).build());
PdfVisualisationUtility.drawRectangle2DList(pdDocument,
pageNumber,
cbiAddressRects,
PdfVisualisationUtility.Options.builder().stroke(true).strokeColor(Color.green).build());
pageNumber,
cbiAddressRects,
PdfVisualisationUtility.Options.builder().stroke(true).strokeColor(Color.green).build());
}

View File

@ -30,7 +30,13 @@ public class RegExPatternTest {
@Test
public void testEmailRegEx() {
String text = "Address: Schwarzwaldalle " + "P.O.Box\n" + "CH-4002 Basel\n" + "Switzerland\n" + "Contact: Christian Warmers\n" + "Tel: +41 (61) 323 8044\n" + "christian.warmers@syngenta.com";
String text = "Address: Schwarzwaldalle "
+ "P.O.Box\n"
+ "CH-4002 Basel\n"
+ "Switzerland\n"
+ "Contact: Christian Warmers\n"
+ "Tel: +41 (61) 323 8044\n"
+ "christian.warmers@syngenta.com";
Pattern p = Pattern.compile("\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,4}\\b", Pattern.CASE_INSENSITIVE);
@ -46,7 +52,31 @@ public class RegExPatternTest {
@Test
public void testEtAlRegEx() {
String text = "To assess the potential of S-metolachlor to cause endocrine disruption (ED) a review (Charlton 2014,\n" + "ASB2016-762) was submitted that summarises results from regulatory and open scientific literature\n" + "studies covering in vitro and in vivo studies (level 2-5 of the OECD Conceptual Framework). According to this information metolachlor increased (1.5-fold) aromatase activity in JEG-3 cells (Laville et al.\n" + "2006, ASB2010-14391) and induced weak anti-androgenic activity in the MDA-kb2 reporter cell line\n" + "with a IC50 of 9.92 µM (IC50 of positive control flutamide: 0.51 µM) (Aït-Aïssa et al. 2010, ASB2015-\n" + "9562). Data from the Tox21 high throughput screening revealed just few postive findings in assays to\n" + "identify antagonists of the androgen receptor. An isolated result of this screening showed agonistic\n" + "activity on the thyroid stimulating hormone receptor, while Dalton et al. (2003, ASB2018-2832)\n" + "demonstrated that metolachlor induced CYP2B1/2 and CYP3A1/2 but did not affect T4, T3 or TSH.\n" + "After prepubertal exposure of male Wistar rats to metolachlor (Mathias et al. 2012, ASB2016-9890) a\n" + "statistically significant increase of serum hormone concentration was observed for testosterone (at the\n" + "dose 50 mg/kg) as well as a statistically significant decrease in the age of preputial separation at a dose\n" + "of 5 and 50 mg/kg. Furthermore a statistically significant increase for estradiol at a dose of 50 mg/kg\n" + "and for FSH at a dose of 5 and 50 mg/kg and morphological alterations of the seminiferous epithelium\n" + "were observed. Relative testicular weight was not altered. A statistically significant increase of relative\n" + "weights was observed in long-term studies with rats (Tisdel et al. 1983, TOX9800328 ). This finding\n" + "was attributed to lower terminal body weight. In mice a statistically significant decrease of the weight\n" + "seminal vesicle (Tisdel et al. 1982, TOX9800327) was shown after 24 month treatment with\n" + "metolachlor. In a mouse preimplantation embryo assay from open literature metolachlor increased the\n" + "percentage of apoptosis significantly and reduced the mean number of cells per embryo significantly\n" + "while the percentage of developing blastocytes was unaltered (Grennlee et al. 2004, ASB2016-9889).\n" + "In reproduvtive toxicity studies a retarded body weight development of the pups was observed, while\n" + "survival and normal morphological and functional development were not altered. No adverse effects\n" + "on male fertility were seen, however important parameters to assess effects on female fertility like\n" + "cyclicity, ovarian follicles as well as developmental landmarks in the offspring have not been investigated.";
String text = "To assess the potential of S-metolachlor to cause endocrine disruption (ED) a review (Charlton 2014,\n"
+ "ASB2016-762) was submitted that summarises results from regulatory and open scientific literature\n"
+ "studies covering in vitro and in vivo studies (level 2-5 of the OECD Conceptual Framework). According to this information metolachlor increased (1.5-fold) aromatase activity in JEG-3 cells (Laville et al.\n"
+ "2006, ASB2010-14391) and induced weak anti-androgenic activity in the MDA-kb2 reporter cell line\n"
+ "with a IC50 of 9.92 µM (IC50 of positive control flutamide: 0.51 µM) (Aït-Aïssa et al. 2010, ASB2015-\n"
+ "9562). Data from the Tox21 high throughput screening revealed just few postive findings in assays to\n"
+ "identify antagonists of the androgen receptor. An isolated result of this screening showed agonistic\n"
+ "activity on the thyroid stimulating hormone receptor, while Dalton et al. (2003, ASB2018-2832)\n"
+ "demonstrated that metolachlor induced CYP2B1/2 and CYP3A1/2 but did not affect T4, T3 or TSH.\n"
+ "After prepubertal exposure of male Wistar rats to metolachlor (Mathias et al. 2012, ASB2016-9890) a\n"
+ "statistically significant increase of serum hormone concentration was observed for testosterone (at the\n"
+ "dose 50 mg/kg) as well as a statistically significant decrease in the age of preputial separation at a dose\n"
+ "of 5 and 50 mg/kg. Furthermore a statistically significant increase for estradiol at a dose of 50 mg/kg\n"
+ "and for FSH at a dose of 5 and 50 mg/kg and morphological alterations of the seminiferous epithelium\n"
+ "were observed. Relative testicular weight was not altered. A statistically significant increase of relative\n"
+ "weights was observed in long-term studies with rats (Tisdel et al. 1983, TOX9800328 ). This finding\n"
+ "was attributed to lower terminal body weight. In mice a statistically significant decrease of the weight\n"
+ "seminal vesicle (Tisdel et al. 1982, TOX9800327) was shown after 24 month treatment with\n"
+ "metolachlor. In a mouse preimplantation embryo assay from open literature metolachlor increased the\n"
+ "percentage of apoptosis significantly and reduced the mean number of cells per embryo significantly\n"
+ "while the percentage of developing blastocytes was unaltered (Grennlee et al. 2004, ASB2016-9889).\n"
+ "In reproduvtive toxicity studies a retarded body weight development of the pups was observed, while\n"
+ "survival and normal morphological and functional development were not altered. No adverse effects\n"
+ "on male fertility were seen, however important parameters to assess effects on female fertility like\n"
+ "cyclicity, ovarian follicles as well as developmental landmarks in the offspring have not been investigated.";
Pattern p = Pattern.compile("([^\\s(]*?( \\w\\.?)?) et al\\.?");
@ -64,7 +94,6 @@ public class RegExPatternTest {
String word = "Porch JR, " + "Kendall TZ, " + "Krueger HO";
Pattern pattern = Pattern.compile("[A-ZÄÖÜ][\\wäöüéèê]{2,}( [A-ZÄÖÜ]{1,2}\\.)+");
Matcher matcher = pattern.matcher(word);

Some files were not shown because too many files have changed in this diff Show More