reformat #303
@ -15,4 +15,5 @@ public class AnalyzeResponse {
|
||||
|
||||
private String fileId;
|
||||
private List<UnprocessedManualEntity> unprocessedManualEntities;
|
||||
|
||||
}
|
||||
|
||||
@ -17,4 +17,5 @@ public class DroolsSyntaxDeprecatedWarnings {
|
||||
Integer line;
|
||||
Integer column;
|
||||
String message;
|
||||
|
||||
}
|
||||
|
||||
@ -17,4 +17,5 @@ public class DroolsSyntaxErrorMessage {
|
||||
Integer line;
|
||||
Integer column;
|
||||
String message;
|
||||
|
||||
}
|
||||
|
||||
@ -28,6 +28,7 @@ public class DroolsSyntaxValidation {
|
||||
getDroolsSyntaxErrorMessages().add(DroolsSyntaxErrorMessage.builder().line(line).column(column).message(message).build());
|
||||
}
|
||||
|
||||
|
||||
public boolean isCompiled() {
|
||||
|
||||
return droolsSyntaxErrorMessages.isEmpty();
|
||||
|
||||
@ -24,4 +24,5 @@ public class UnprocessedManualEntity {
|
||||
private String section;
|
||||
@Builder.Default
|
||||
private List<Position> positions = new ArrayList<>();
|
||||
|
||||
}
|
||||
|
||||
@ -35,11 +35,14 @@ public class Application {
|
||||
SpringApplication.run(Application.class, args);
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
public ObservedAspect observedAspect(ObservationRegistry observationRegistry) {
|
||||
|
||||
return new ObservedAspect(observationRegistry);
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
public TimedAspect timedAspect(MeterRegistry registry) {
|
||||
|
||||
|
||||
@ -95,6 +95,7 @@ public class DeprecatedElementsFinder {
|
||||
return this.deprecatedClasses;
|
||||
}
|
||||
|
||||
|
||||
private String getMethodSignature(Method method) {
|
||||
|
||||
String methodName = method.getName();
|
||||
|
||||
@ -16,7 +16,7 @@ public class RedisCachingConfiguration {
|
||||
public RedisCacheManagerBuilderCustomizer redisCacheManagerBuilderCustomizer() {
|
||||
|
||||
return (builder) -> builder.withCacheConfiguration("documentDataCache",
|
||||
RedisCacheConfiguration.defaultCacheConfig().entryTtl(Duration.ofMinutes(30)).disableCachingNullValues());
|
||||
RedisCacheConfiguration.defaultCacheConfig().entryTtl(Duration.ofMinutes(30)).disableCachingNullValues());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
package com.iqser.red.service.redaction.v1.server.client.model;
|
||||
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
|
||||
@ -13,7 +13,6 @@ import lombok.RequiredArgsConstructor;
|
||||
@RequiredArgsConstructor
|
||||
public class RuleBuilderController implements RuleBuilderResource {
|
||||
|
||||
|
||||
@Override
|
||||
public RuleBuilderModel getRuleBuilderModel() {
|
||||
|
||||
|
||||
@ -92,11 +92,16 @@ public class LegacyRedactionLogMergeService {
|
||||
return redactionLog;
|
||||
}
|
||||
|
||||
|
||||
public long getNumberOfAffectedAnnotations(ManualRedactions manualRedactions) {
|
||||
|
||||
return createManualRedactionWrappers(manualRedactions).stream().map(ManualRedactionWrapper::getId).distinct().count();
|
||||
return createManualRedactionWrappers(manualRedactions).stream()
|
||||
.map(ManualRedactionWrapper::getId)
|
||||
.distinct()
|
||||
.count();
|
||||
}
|
||||
|
||||
|
||||
private List<ManualRedactionWrapper> createManualRedactionWrappers(ManualRedactions manualRedactions) {
|
||||
|
||||
List<ManualRedactionWrapper> manualRedactionWrappers = new ArrayList<>();
|
||||
|
||||
@ -21,7 +21,9 @@ public class LegacyVersion0MigrationService {
|
||||
public RedactionLog mergeDuplicateAnnotationIds(RedactionLog redactionLog) {
|
||||
|
||||
List<RedactionLogEntry> mergedEntries = new LinkedList<>();
|
||||
Map<String, List<RedactionLogEntry>> entriesById = redactionLog.getRedactionLogEntry().stream().collect(Collectors.groupingBy(RedactionLogEntry::getId));
|
||||
Map<String, List<RedactionLogEntry>> entriesById = redactionLog.getRedactionLogEntry()
|
||||
.stream()
|
||||
.collect(Collectors.groupingBy(RedactionLogEntry::getId));
|
||||
for (List<RedactionLogEntry> entries : entriesById.values()) {
|
||||
|
||||
if (entries.isEmpty()) {
|
||||
@ -33,7 +35,10 @@ public class LegacyVersion0MigrationService {
|
||||
continue;
|
||||
}
|
||||
|
||||
List<RedactionLogEntry> sortedEntries = entries.stream().sorted(Comparator.comparing(entry -> entry.getChanges().get(0).getDateTime())).toList();
|
||||
List<RedactionLogEntry> sortedEntries = entries.stream()
|
||||
.sorted(Comparator.comparing(entry -> entry.getChanges()
|
||||
.get(0).getDateTime()))
|
||||
.toList();
|
||||
|
||||
RedactionLogEntry initialEntry = sortedEntries.get(0);
|
||||
for (RedactionLogEntry entry : sortedEntries.subList(1, sortedEntries.size())) {
|
||||
|
||||
@ -14,4 +14,5 @@ public record KieWrapper(KieContainer container, long rulesVersion) {
|
||||
|
||||
return container != null && rulesVersion >= 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -19,4 +19,5 @@ public class MigratedEntityLog {
|
||||
|
||||
MigratedIds migratedIds;
|
||||
EntityLog entityLog;
|
||||
|
||||
}
|
||||
|
||||
@ -27,13 +27,15 @@ public class NerEntities {
|
||||
|
||||
public boolean hasEntitiesOfType(String type) {
|
||||
|
||||
return nerEntityList.stream().anyMatch(nerEntity -> nerEntity.type.equals(type));
|
||||
return nerEntityList.stream()
|
||||
.anyMatch(nerEntity -> nerEntity.type.equals(type));
|
||||
}
|
||||
|
||||
|
||||
public Stream<NerEntity> streamEntitiesOfType(String type) {
|
||||
|
||||
return nerEntityList.stream().filter(nerEntity -> nerEntity.type().equals(type));
|
||||
return nerEntityList.stream()
|
||||
.filter(nerEntity -> nerEntity.type().equals(type));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -88,7 +88,8 @@ public class Entity {
|
||||
.textAfter(e.getTextAfter())
|
||||
.startOffset(e.getStartOffset())
|
||||
.endOffset(e.getEndOffset())
|
||||
.length(Optional.ofNullable(e.getValue()).orElse("").length())
|
||||
.length(Optional.ofNullable(e.getValue())
|
||||
.orElse("").length())
|
||||
.imageHasTransparency(e.isImageHasTransparency())
|
||||
.isDictionaryEntry(e.isDictionaryEntry())
|
||||
.isDossierDictionaryEntry(e.isDossierDictionaryEntry())
|
||||
|
||||
@ -53,7 +53,8 @@ public class Dictionary {
|
||||
|
||||
public boolean hasLocalEntries() {
|
||||
|
||||
return dictionaryModels.stream().anyMatch(dm -> !dm.getLocalEntriesWithMatchedRules().isEmpty());
|
||||
return dictionaryModels.stream()
|
||||
.anyMatch(dm -> !dm.getLocalEntriesWithMatchedRules().isEmpty());
|
||||
}
|
||||
|
||||
|
||||
@ -116,12 +117,18 @@ public class Dictionary {
|
||||
}
|
||||
localAccessMap.get(type)
|
||||
.getLocalEntriesWithMatchedRules()
|
||||
.merge(cleanedValue.trim(), matchedRulesSet, (set1, set2) -> Stream.concat(set1.stream(), set2.stream()).collect(Collectors.toSet()));
|
||||
.merge(cleanedValue.trim(),
|
||||
matchedRulesSet,
|
||||
(set1, set2) -> Stream.concat(set1.stream(), set2.stream())
|
||||
.collect(Collectors.toSet()));
|
||||
if (alsoAddLastname) {
|
||||
String lastname = cleanedValue.split(" ")[0];
|
||||
localAccessMap.get(type)
|
||||
.getLocalEntriesWithMatchedRules()
|
||||
.merge(lastname, matchedRulesSet, (set1, set2) -> Stream.concat(set1.stream(), set2.stream()).collect(Collectors.toSet()));
|
||||
.merge(lastname,
|
||||
matchedRulesSet,
|
||||
(set1, set2) -> Stream.concat(set1.stream(), set2.stream())
|
||||
.collect(Collectors.toSet()));
|
||||
}
|
||||
}
|
||||
|
||||
@ -153,7 +160,10 @@ public class Dictionary {
|
||||
} else {
|
||||
splitAuthorNames = Arrays.asList(textEntity.getValueWithLineBreaks().split("\n"));
|
||||
}
|
||||
return splitAuthorNames.stream().map(String::trim).filter(authorName -> Patterns.AUTHOR_NAME_PATTERN.matcher(authorName).matches()).toList();
|
||||
return splitAuthorNames.stream()
|
||||
.map(String::trim)
|
||||
.filter(authorName -> Patterns.AUTHOR_NAME_PATTERN.matcher(authorName).matches())
|
||||
.toList();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -57,14 +57,18 @@ public class DictionaryModel implements Serializable {
|
||||
this.falsePositives = falsePositives;
|
||||
this.falseRecommendations = falseRecommendations;
|
||||
|
||||
this.entriesSearch = new SearchImplementation(this.entries.stream().filter(e -> !e.isDeleted()).map(DictionaryEntryModel::getValue).collect(Collectors.toList()),
|
||||
caseInsensitive);
|
||||
this.falsePositiveSearch = new SearchImplementation(this.falsePositives.stream().filter(e -> !e.isDeleted()).map(DictionaryEntryModel::getValue).collect(Collectors.toList()),
|
||||
caseInsensitive);
|
||||
this.entriesSearch = new SearchImplementation(this.entries.stream()
|
||||
.filter(e -> !e.isDeleted())
|
||||
.map(DictionaryEntryModel::getValue)
|
||||
.collect(Collectors.toList()), caseInsensitive);
|
||||
this.falsePositiveSearch = new SearchImplementation(this.falsePositives.stream()
|
||||
.filter(e -> !e.isDeleted())
|
||||
.map(DictionaryEntryModel::getValue)
|
||||
.collect(Collectors.toList()), caseInsensitive);
|
||||
this.falseRecommendationsSearch = new SearchImplementation(this.falseRecommendations.stream()
|
||||
.filter(e -> !e.isDeleted())
|
||||
.map(DictionaryEntry::getValue)
|
||||
.collect(Collectors.toList()), caseInsensitive);
|
||||
.filter(e -> !e.isDeleted())
|
||||
.map(DictionaryEntry::getValue)
|
||||
.collect(Collectors.toList()), caseInsensitive);
|
||||
|
||||
}
|
||||
|
||||
@ -81,8 +85,10 @@ public class DictionaryModel implements Serializable {
|
||||
public SearchImplementation getEntriesSearch() {
|
||||
|
||||
if (entriesSearch == null) {
|
||||
this.entriesSearch = new SearchImplementation(this.entries.stream().filter(e -> !e.isDeleted()).map(DictionaryEntry::getValue).collect(Collectors.toList()),
|
||||
caseInsensitive);
|
||||
this.entriesSearch = new SearchImplementation(this.entries.stream()
|
||||
.filter(e -> !e.isDeleted())
|
||||
.map(DictionaryEntry::getValue)
|
||||
.collect(Collectors.toList()), caseInsensitive);
|
||||
}
|
||||
return entriesSearch;
|
||||
}
|
||||
@ -92,9 +98,9 @@ public class DictionaryModel implements Serializable {
|
||||
|
||||
if (falsePositiveSearch == null) {
|
||||
this.falsePositiveSearch = new SearchImplementation(this.falsePositives.stream()
|
||||
.filter(e -> !e.isDeleted())
|
||||
.map(DictionaryEntry::getValue)
|
||||
.collect(Collectors.toList()), caseInsensitive);
|
||||
.filter(e -> !e.isDeleted())
|
||||
.map(DictionaryEntry::getValue)
|
||||
.collect(Collectors.toList()), caseInsensitive);
|
||||
}
|
||||
return falsePositiveSearch;
|
||||
}
|
||||
@ -104,14 +110,16 @@ public class DictionaryModel implements Serializable {
|
||||
|
||||
if (falseRecommendationsSearch == null) {
|
||||
this.falseRecommendationsSearch = new SearchImplementation(this.falseRecommendations.stream()
|
||||
.filter(e -> !e.isDeleted())
|
||||
.map(DictionaryEntry::getValue)
|
||||
.collect(Collectors.toList()), caseInsensitive);
|
||||
.filter(e -> !e.isDeleted())
|
||||
.map(DictionaryEntry::getValue)
|
||||
.collect(Collectors.toList()), caseInsensitive);
|
||||
}
|
||||
return falseRecommendationsSearch;
|
||||
}
|
||||
|
||||
|
||||
public Set<MatchedRule> getMatchedRulesForLocalDictionaryEntry(String value) {
|
||||
|
||||
var cleanedValue = isCaseInsensitive() ? value.toLowerCase(Locale.US) : value;
|
||||
|
||||
return localEntriesWithMatchedRules.get(cleanedValue);
|
||||
|
||||
@ -76,7 +76,9 @@ public class SearchImplementation {
|
||||
if (ignoreCase) {
|
||||
textToCheck = textToCheck.toLowerCase(Locale.ROOT);
|
||||
}
|
||||
return this.pattern.matcher(textToCheck).results().findAny().isPresent();
|
||||
return this.pattern.matcher(textToCheck).results()
|
||||
.findAny()
|
||||
.isPresent();
|
||||
} else {
|
||||
return this.trie.containsMatch(textToCheck);
|
||||
}
|
||||
@ -89,9 +91,14 @@ public class SearchImplementation {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
if (this.pattern != null) {
|
||||
return this.pattern.matcher(text).results().map(r -> new TextRange(r.start(), r.end())).collect(Collectors.toList());
|
||||
return this.pattern.matcher(text).results()
|
||||
.map(r -> new TextRange(r.start(), r.end()))
|
||||
.collect(Collectors.toList());
|
||||
} else {
|
||||
return this.trie.parseText(text).stream().map(r -> new TextRange(r.getStart(), r.getEnd() + 1)).collect(Collectors.toList());
|
||||
return this.trie.parseText(text)
|
||||
.stream()
|
||||
.map(r -> new TextRange(r.getStart(), r.getEnd() + 1))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
|
||||
@ -103,9 +110,14 @@ public class SearchImplementation {
|
||||
}
|
||||
CharSequence subSequence = text.subSequence(region.start(), region.end());
|
||||
if (this.pattern != null) {
|
||||
return this.pattern.matcher(subSequence).results().map(r -> new TextRange(r.start() + region.start(), r.end() + region.start())).collect(Collectors.toList());
|
||||
return this.pattern.matcher(subSequence).results()
|
||||
.map(r -> new TextRange(r.start() + region.start(), r.end() + region.start()))
|
||||
.collect(Collectors.toList());
|
||||
} else {
|
||||
return this.trie.parseText(subSequence).stream().map(r -> new TextRange(r.getStart() + region.start(), r.getEnd() + region.start() + 1)).collect(Collectors.toList());
|
||||
return this.trie.parseText(subSequence)
|
||||
.stream()
|
||||
.map(r -> new TextRange(r.getStart() + region.start(), r.getEnd() + region.start() + 1))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
|
||||
@ -120,9 +132,14 @@ public class SearchImplementation {
|
||||
if (ignoreCase) {
|
||||
textToCheck = textToCheck.toLowerCase(Locale.ROOT);
|
||||
}
|
||||
return this.pattern.matcher(textToCheck).results().map(r -> new MatchPosition(r.start(), r.end())).collect(Collectors.toList());
|
||||
return this.pattern.matcher(textToCheck).results()
|
||||
.map(r -> new MatchPosition(r.start(), r.end()))
|
||||
.collect(Collectors.toList());
|
||||
} else {
|
||||
return this.trie.parseText(textToCheck).stream().map(r -> new MatchPosition(r.getStart(), r.getEnd() + 1)).collect(Collectors.toList());
|
||||
return this.trie.parseText(textToCheck)
|
||||
.stream()
|
||||
.map(r -> new MatchPosition(r.getStart(), r.getEnd() + 1))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -40,7 +40,10 @@ public class DocumentTree {
|
||||
|
||||
public TextBlock buildTextBlock() {
|
||||
|
||||
return allEntriesInOrder().map(Entry::getNode).filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock).collect(new TextBlockCollector());
|
||||
return allEntriesInOrder().map(Entry::getNode)
|
||||
.filter(SemanticNode::isLeaf)
|
||||
.map(SemanticNode::getLeafTextBlock)
|
||||
.collect(new TextBlockCollector());
|
||||
}
|
||||
|
||||
|
||||
@ -114,13 +117,16 @@ public class DocumentTree {
|
||||
|
||||
public Stream<SemanticNode> childNodes(List<Integer> treeId) {
|
||||
|
||||
return getEntryById(treeId).children.stream().map(Entry::getNode);
|
||||
return getEntryById(treeId).children.stream()
|
||||
.map(Entry::getNode);
|
||||
}
|
||||
|
||||
|
||||
public Stream<SemanticNode> childNodesOfType(List<Integer> treeId, NodeType nodeType) {
|
||||
|
||||
return getEntryById(treeId).children.stream().filter(entry -> entry.node.getType().equals(nodeType)).map(Entry::getNode);
|
||||
return getEntryById(treeId).children.stream()
|
||||
.filter(entry -> entry.node.getType().equals(nodeType))
|
||||
.map(Entry::getNode);
|
||||
}
|
||||
|
||||
|
||||
@ -199,26 +205,32 @@ public class DocumentTree {
|
||||
|
||||
public Stream<Entry> allEntriesInOrder() {
|
||||
|
||||
return Stream.of(root).flatMap(DocumentTree::flatten);
|
||||
return Stream.of(root)
|
||||
.flatMap(DocumentTree::flatten);
|
||||
}
|
||||
|
||||
|
||||
public Stream<Entry> allSubEntriesInOrder(List<Integer> parentId) {
|
||||
|
||||
return getEntryById(parentId).children.stream().flatMap(DocumentTree::flatten);
|
||||
return getEntryById(parentId).children.stream()
|
||||
.flatMap(DocumentTree::flatten);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return String.join("\n", allEntriesInOrder().map(Entry::toString).toList());
|
||||
return String.join("\n",
|
||||
allEntriesInOrder().map(Entry::toString)
|
||||
.toList());
|
||||
}
|
||||
|
||||
|
||||
private static Stream<Entry> flatten(Entry entry) {
|
||||
|
||||
return Stream.concat(Stream.of(entry), entry.children.stream().flatMap(DocumentTree::flatten));
|
||||
return Stream.concat(Stream.of(entry),
|
||||
entry.children.stream()
|
||||
.flatMap(DocumentTree::flatten));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -92,8 +92,13 @@ public class TextRange implements Comparable<TextRange> {
|
||||
|
||||
public List<TextRange> split(List<Integer> splitIndices) {
|
||||
|
||||
if (splitIndices.stream().anyMatch(idx -> !this.contains(idx))) {
|
||||
throw new IndexOutOfBoundsException(format("%s splitting indices are out of range for %s", splitIndices.stream().filter(idx -> !this.contains(idx)).toList(), this));
|
||||
if (splitIndices.stream()
|
||||
.anyMatch(idx -> !this.contains(idx))) {
|
||||
throw new IndexOutOfBoundsException(format("%s splitting indices are out of range for %s",
|
||||
splitIndices.stream()
|
||||
.filter(idx -> !this.contains(idx))
|
||||
.toList(),
|
||||
this));
|
||||
}
|
||||
List<TextRange> splitBoundaries = new LinkedList<>();
|
||||
int previousIndex = start;
|
||||
@ -113,8 +118,14 @@ public class TextRange implements Comparable<TextRange> {
|
||||
|
||||
public static TextRange merge(Collection<TextRange> boundaries) {
|
||||
|
||||
int minStart = boundaries.stream().mapToInt(TextRange::start).min().orElseThrow(IllegalArgumentException::new);
|
||||
int maxEnd = boundaries.stream().mapToInt(TextRange::end).max().orElseThrow(IllegalArgumentException::new);
|
||||
int minStart = boundaries.stream()
|
||||
.mapToInt(TextRange::start)
|
||||
.min()
|
||||
.orElseThrow(IllegalArgumentException::new);
|
||||
int maxEnd = boundaries.stream()
|
||||
.mapToInt(TextRange::end)
|
||||
.max()
|
||||
.orElseThrow(IllegalArgumentException::new);
|
||||
return new TextRange(minStart, maxEnd);
|
||||
}
|
||||
|
||||
|
||||
@ -35,14 +35,16 @@ public interface IEntity {
|
||||
|
||||
default String value() {
|
||||
|
||||
return getManualOverwrite().getValue().orElse(getValue() == null ? "" : getValue());
|
||||
return getManualOverwrite().getValue()
|
||||
.orElse(getValue() == null ? "" : getValue());
|
||||
}
|
||||
|
||||
|
||||
// Don't use default accessor pattern (e.g. isApplied()), as it might lead to errors in drools due to property-specific optimization of the drools planner.
|
||||
default boolean applied() {
|
||||
|
||||
return getManualOverwrite().getApplied().orElse(getMatchedRule().isApplied());
|
||||
return getManualOverwrite().getApplied()
|
||||
.orElse(getMatchedRule().isApplied());
|
||||
}
|
||||
|
||||
|
||||
@ -54,19 +56,22 @@ public interface IEntity {
|
||||
|
||||
default boolean ignored() {
|
||||
|
||||
return getManualOverwrite().getIgnored().orElse(getMatchedRule().isIgnored());
|
||||
return getManualOverwrite().getIgnored()
|
||||
.orElse(getMatchedRule().isIgnored());
|
||||
}
|
||||
|
||||
|
||||
default boolean removed() {
|
||||
|
||||
return getManualOverwrite().getRemoved().orElse(getMatchedRule().isRemoved());
|
||||
return getManualOverwrite().getRemoved()
|
||||
.orElse(getMatchedRule().isRemoved());
|
||||
}
|
||||
|
||||
|
||||
default boolean resized() {
|
||||
|
||||
return getManualOverwrite().getResized().orElse(false);
|
||||
return getManualOverwrite().getResized()
|
||||
.orElse(false);
|
||||
}
|
||||
|
||||
|
||||
@ -133,12 +138,12 @@ public interface IEntity {
|
||||
throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity");
|
||||
}
|
||||
getMatchedRuleList().add(MatchedRule.builder()
|
||||
.ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier))
|
||||
.reason(reason)
|
||||
.legalBasis(legalBasis)
|
||||
.applied(true)
|
||||
.writeValueWithLineBreaks(true)
|
||||
.build());
|
||||
.ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier))
|
||||
.reason(reason)
|
||||
.legalBasis(legalBasis)
|
||||
.applied(true)
|
||||
.writeValueWithLineBreaks(true)
|
||||
.build());
|
||||
}
|
||||
|
||||
|
||||
@ -148,12 +153,12 @@ public interface IEntity {
|
||||
throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity");
|
||||
}
|
||||
getMatchedRuleList().add(MatchedRule.builder()
|
||||
.ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier))
|
||||
.reason(reason)
|
||||
.legalBasis(legalBasis)
|
||||
.applied(true)
|
||||
.references(new HashSet<>(references))
|
||||
.build());
|
||||
.ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier))
|
||||
.reason(reason)
|
||||
.legalBasis(legalBasis)
|
||||
.applied(true)
|
||||
.references(new HashSet<>(references))
|
||||
.build());
|
||||
}
|
||||
|
||||
|
||||
@ -207,7 +212,8 @@ public interface IEntity {
|
||||
|
||||
default String legalBasis() {
|
||||
|
||||
return getManualOverwrite().getLegalBasis().orElse(getMatchedRule().getLegalBasis());
|
||||
return getManualOverwrite().getLegalBasis()
|
||||
.orElse(getMatchedRule().getLegalBasis());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -52,7 +52,8 @@ public final class MatchedRule implements Comparable<MatchedRule> {
|
||||
if (!this.isApplied()) {
|
||||
return this;
|
||||
}
|
||||
return MatchedRule.builder().ruleIdentifier(getRuleIdentifier())
|
||||
return MatchedRule.builder()
|
||||
.ruleIdentifier(getRuleIdentifier())
|
||||
.writeValueWithLineBreaks(this.isWriteValueWithLineBreaks())
|
||||
.legalBasis(this.getLegalBasis())
|
||||
.reason(this.getReason())
|
||||
@ -97,7 +98,19 @@ public final class MatchedRule implements Comparable<MatchedRule> {
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return "MatchedRule[ruleIdentifier=" + ruleIdentifier + ", reason=" + reason + ", legalBasis=" + legalBasis + ", applied=" + applied + ", writeValueWithLineBreaks=" + writeValueWithLineBreaks + ", references=" + references + ']';
|
||||
return "MatchedRule[ruleIdentifier="
|
||||
+ ruleIdentifier
|
||||
+ ", reason="
|
||||
+ reason
|
||||
+ ", legalBasis="
|
||||
+ legalBasis
|
||||
+ ", applied="
|
||||
+ applied
|
||||
+ ", writeValueWithLineBreaks="
|
||||
+ writeValueWithLineBreaks
|
||||
+ ", references="
|
||||
+ references
|
||||
+ ']';
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -67,7 +67,13 @@ public class TextEntity implements IEntity {
|
||||
|
||||
public static TextEntity initialEntityNode(TextRange textRange, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return TextEntity.builder().id(buildId(node, textRange, type, entityType)).type(type).entityType(entityType).textRange(textRange).manualOverwrite(new ManualChangeOverwrite(entityType)).build();
|
||||
return TextEntity.builder()
|
||||
.id(buildId(node, textRange, type, entityType))
|
||||
.type(type)
|
||||
.entityType(entityType)
|
||||
.textRange(textRange)
|
||||
.manualOverwrite(new ManualChangeOverwrite(entityType))
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
@ -80,7 +86,13 @@ public class TextEntity implements IEntity {
|
||||
private static String buildId(SemanticNode node, TextRange textRange, String type, EntityType entityType) {
|
||||
|
||||
Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = node.getPositionsPerPage(textRange);
|
||||
return IdBuilder.buildId(rectanglesPerLinePerPage.keySet(), rectanglesPerLinePerPage.values().stream().flatMap(Collection::stream).toList(), type, entityType.name());
|
||||
return IdBuilder.buildId(rectanglesPerLinePerPage.keySet(),
|
||||
rectanglesPerLinePerPage.values()
|
||||
.stream()
|
||||
.flatMap(Collection::stream)
|
||||
.toList(),
|
||||
type,
|
||||
entityType.name());
|
||||
}
|
||||
|
||||
|
||||
@ -89,15 +101,18 @@ public class TextEntity implements IEntity {
|
||||
duplicateTextRanges.add(textRange);
|
||||
}
|
||||
|
||||
|
||||
public boolean occursInNodeOfType(Class<? extends SemanticNode> clazz) {
|
||||
|
||||
return intersectingNodes.stream().anyMatch(clazz::isInstance);
|
||||
return intersectingNodes.stream()
|
||||
.anyMatch(clazz::isInstance);
|
||||
}
|
||||
|
||||
|
||||
public boolean occursInNode(SemanticNode semanticNode) {
|
||||
|
||||
return intersectingNodes.stream().anyMatch(node -> node.equals(semanticNode));
|
||||
return intersectingNodes.stream()
|
||||
.anyMatch(node -> node.equals(semanticNode));
|
||||
}
|
||||
|
||||
|
||||
@ -146,7 +161,10 @@ public class TextEntity implements IEntity {
|
||||
.min(Comparator.comparingInt(Page::getNumber))
|
||||
.orElseThrow(() -> new RuntimeException("No Positions found on any page!"));
|
||||
|
||||
positionsOnPagePerPage = rectanglesPerLinePerPage.entrySet().stream().map(entry -> buildPositionOnPage(firstPage, id, entry)).toList();
|
||||
positionsOnPagePerPage = rectanglesPerLinePerPage.entrySet()
|
||||
.stream()
|
||||
.map(entry -> buildPositionOnPage(firstPage, id, entry))
|
||||
.toList();
|
||||
}
|
||||
return positionsOnPagePerPage;
|
||||
}
|
||||
@ -194,7 +212,8 @@ public class TextEntity implements IEntity {
|
||||
|
||||
public boolean matchesAnnotationId(String manualRedactionId) {
|
||||
|
||||
return getPositionsOnPagePerPage().stream().anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId));
|
||||
return getPositionsOnPagePerPage().stream()
|
||||
.anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId));
|
||||
}
|
||||
|
||||
|
||||
@ -224,14 +243,16 @@ public class TextEntity implements IEntity {
|
||||
@Override
|
||||
public String type() {
|
||||
|
||||
return getManualOverwrite().getType().orElse(type);
|
||||
return getManualOverwrite().getType()
|
||||
.orElse(type);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String value() {
|
||||
|
||||
return getManualOverwrite().getValue().orElse(getMatchedRule().isWriteValueWithLineBreaks() ? getValueWithLineBreaks() : value);
|
||||
return getManualOverwrite().getValue()
|
||||
.orElse(getMatchedRule().isWriteValueWithLineBreaks() ? getValueWithLineBreaks() : value);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -65,13 +65,15 @@ public class Document implements GenericSemanticNode {
|
||||
|
||||
public List<Section> getMainSections() {
|
||||
|
||||
return streamChildrenOfType(NodeType.SECTION).map(node -> (Section) node).collect(Collectors.toList());
|
||||
return streamChildrenOfType(NodeType.SECTION).map(node -> (Section) node)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
public Stream<TextBlock> streamTerminalTextBlocksInOrder() {
|
||||
|
||||
return streamAllNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock);
|
||||
return streamAllNodes().filter(SemanticNode::isLeaf)
|
||||
.map(SemanticNode::getLeafTextBlock);
|
||||
}
|
||||
|
||||
|
||||
@ -92,13 +94,16 @@ public class Document implements GenericSemanticNode {
|
||||
@Override
|
||||
public Headline getHeadline() {
|
||||
|
||||
return streamAllSubNodesOfType(NodeType.HEADLINE).map(node -> (Headline) node).findFirst().orElseGet(Headline::empty);
|
||||
return streamAllSubNodesOfType(NodeType.HEADLINE).map(node -> (Headline) node)
|
||||
.findFirst()
|
||||
.orElseGet(Headline::empty);
|
||||
}
|
||||
|
||||
|
||||
private Stream<SemanticNode> streamAllNodes() {
|
||||
|
||||
return documentTree.allEntriesInOrder().map(DocumentTree.Entry::getNode);
|
||||
return documentTree.allEntriesInOrder()
|
||||
.map(DocumentTree.Entry::getNode);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -106,7 +106,9 @@ public class Headline implements GenericSemanticNode {
|
||||
|
||||
public boolean hasParagraphs() {
|
||||
|
||||
return getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH).findFirst().isPresent();
|
||||
return getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH)
|
||||
.findFirst()
|
||||
.isPresent();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -136,7 +136,7 @@ public class Image implements GenericSemanticNode, IEntity {
|
||||
Map<Page, Rectangle2D> bboxImage = image.getBBox();
|
||||
Map<Page, Rectangle2D> bbox = this.getBBox();
|
||||
//image needs to be on the same page
|
||||
if(bboxImage.get(this.page) != null) {
|
||||
if (bboxImage.get(this.page) != null) {
|
||||
Rectangle2D intersection = bboxImage.get(this.page).createIntersection(bbox.get(this.page));
|
||||
double calculatedIntersection = intersection.getWidth() * intersection.getHeight();
|
||||
double area = bbox.get(this.page).getWidth() * bbox.get(this.page).getHeight();
|
||||
|
||||
@ -45,7 +45,10 @@ public class Page {
|
||||
|
||||
public TextBlock getMainBodyTextBlock() {
|
||||
|
||||
return mainBody.stream().filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock).collect(new TextBlockCollector());
|
||||
return mainBody.stream()
|
||||
.filter(SemanticNode::isLeaf)
|
||||
.map(SemanticNode::getLeafTextBlock)
|
||||
.collect(new TextBlockCollector());
|
||||
}
|
||||
|
||||
|
||||
@ -54,4 +57,5 @@ public class Page {
|
||||
|
||||
return String.valueOf(number);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -53,7 +53,8 @@ public class Section implements GenericSemanticNode {
|
||||
|
||||
public boolean hasTables() {
|
||||
|
||||
return streamAllSubNodesOfType(NodeType.TABLE).findAny().isPresent();
|
||||
return streamAllSubNodesOfType(NodeType.TABLE).findAny()
|
||||
.isPresent();
|
||||
}
|
||||
|
||||
|
||||
@ -68,7 +69,9 @@ public class Section implements GenericSemanticNode {
|
||||
public TextBlock getTextBlock() {
|
||||
|
||||
if (textBlock == null) {
|
||||
textBlock = streamAllSubNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock).collect(new TextBlockCollector());
|
||||
textBlock = streamAllSubNodes().filter(SemanticNode::isLeaf)
|
||||
.map(SemanticNode::getLeafTextBlock)
|
||||
.collect(new TextBlockCollector());
|
||||
}
|
||||
return textBlock;
|
||||
}
|
||||
|
||||
@ -72,7 +72,11 @@ public class SectionIdentifier {
|
||||
}
|
||||
identifiers.add(Integer.parseInt(numericalIdentifier.trim()));
|
||||
}
|
||||
return new SectionIdentifier(Format.NUMERICAL, identifierString, identifiers.stream().toList(), false);
|
||||
return new SectionIdentifier(Format.NUMERICAL,
|
||||
identifierString,
|
||||
identifiers.stream()
|
||||
.toList(),
|
||||
false);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -71,7 +71,10 @@ public interface SemanticNode {
|
||||
*/
|
||||
default Page getFirstPage() {
|
||||
|
||||
return getTextBlock().getPages().stream().min(Comparator.comparingInt(Page::getNumber)).orElseThrow();
|
||||
return getTextBlock().getPages()
|
||||
.stream()
|
||||
.min(Comparator.comparingInt(Page::getNumber))
|
||||
.orElseThrow();
|
||||
}
|
||||
|
||||
|
||||
@ -97,7 +100,8 @@ public interface SemanticNode {
|
||||
*/
|
||||
default boolean onPage(int pageNumber) {
|
||||
|
||||
return getPages().stream().anyMatch(page -> page.getNumber() == pageNumber);
|
||||
return getPages().stream()
|
||||
.anyMatch(page -> page.getNumber() == pageNumber);
|
||||
}
|
||||
|
||||
|
||||
@ -249,7 +253,9 @@ public interface SemanticNode {
|
||||
*/
|
||||
default boolean hasEntitiesOfType(String type) {
|
||||
|
||||
return getEntities().stream().filter(TextEntity::active).anyMatch(redactionEntity -> redactionEntity.type().equals(type));
|
||||
return getEntities().stream()
|
||||
.filter(TextEntity::active)
|
||||
.anyMatch(redactionEntity -> redactionEntity.type().equals(type));
|
||||
}
|
||||
|
||||
|
||||
@ -262,7 +268,10 @@ public interface SemanticNode {
|
||||
*/
|
||||
default boolean hasEntitiesOfAnyType(String... types) {
|
||||
|
||||
return getEntities().stream().filter(TextEntity::active).anyMatch(redactionEntity -> Arrays.stream(types).anyMatch(type -> redactionEntity.type().equals(type)));
|
||||
return getEntities().stream()
|
||||
.filter(TextEntity::active)
|
||||
.anyMatch(redactionEntity -> Arrays.stream(types)
|
||||
.anyMatch(type -> redactionEntity.type().equals(type)));
|
||||
}
|
||||
|
||||
|
||||
@ -275,7 +284,12 @@ public interface SemanticNode {
|
||||
*/
|
||||
default boolean hasEntitiesOfAllTypes(String... types) {
|
||||
|
||||
return getEntities().stream().filter(TextEntity::active).map(TextEntity::type).collect(Collectors.toUnmodifiableSet()).containsAll(Arrays.stream(types).toList());
|
||||
return getEntities().stream()
|
||||
.filter(TextEntity::active)
|
||||
.map(TextEntity::type)
|
||||
.collect(Collectors.toUnmodifiableSet())
|
||||
.containsAll(Arrays.stream(types)
|
||||
.toList());
|
||||
}
|
||||
|
||||
|
||||
@ -288,7 +302,10 @@ public interface SemanticNode {
|
||||
*/
|
||||
default List<TextEntity> getEntitiesOfType(String type) {
|
||||
|
||||
return getEntities().stream().filter(TextEntity::active).filter(redactionEntity -> redactionEntity.type().equals(type)).toList();
|
||||
return getEntities().stream()
|
||||
.filter(TextEntity::active)
|
||||
.filter(redactionEntity -> redactionEntity.type().equals(type))
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
@ -301,7 +318,10 @@ public interface SemanticNode {
|
||||
*/
|
||||
default List<TextEntity> getEntitiesOfType(List<String> types) {
|
||||
|
||||
return getEntities().stream().filter(TextEntity::active).filter(redactionEntity -> redactionEntity.isAnyType(types)).toList();
|
||||
return getEntities().stream()
|
||||
.filter(TextEntity::active)
|
||||
.filter(redactionEntity -> redactionEntity.isAnyType(types))
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
@ -314,7 +334,11 @@ public interface SemanticNode {
|
||||
*/
|
||||
default List<TextEntity> getEntitiesOfType(String... types) {
|
||||
|
||||
return getEntities().stream().filter(TextEntity::active).filter(redactionEntity -> redactionEntity.isAnyType(Arrays.stream(types).toList())).toList();
|
||||
return getEntities().stream()
|
||||
.filter(TextEntity::active)
|
||||
.filter(redactionEntity -> redactionEntity.isAnyType(Arrays.stream(types)
|
||||
.toList()))
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
@ -328,7 +352,8 @@ public interface SemanticNode {
|
||||
|
||||
TextBlock textBlock = getTextBlock();
|
||||
if (!textBlock.getAtomicTextBlocks().isEmpty()) {
|
||||
return getTextBlock().getAtomicTextBlocks().get(0).getNumberOnPage();
|
||||
return getTextBlock().getAtomicTextBlocks()
|
||||
.get(0).getNumberOnPage();
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
@ -357,14 +382,16 @@ public interface SemanticNode {
|
||||
return getTextBlock().getSearchText().contains(string);
|
||||
}
|
||||
|
||||
|
||||
Set<LayoutEngine> getEngines();
|
||||
|
||||
|
||||
default void addEngine(LayoutEngine engine) {
|
||||
|
||||
getEngines().add(engine);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode contains all the provided Strings.
|
||||
*
|
||||
@ -373,7 +400,8 @@ public interface SemanticNode {
|
||||
*/
|
||||
default boolean containsAllStrings(String... strings) {
|
||||
|
||||
return Arrays.stream(strings).allMatch(this::containsString);
|
||||
return Arrays.stream(strings)
|
||||
.allMatch(this::containsString);
|
||||
}
|
||||
|
||||
|
||||
@ -385,7 +413,8 @@ public interface SemanticNode {
|
||||
*/
|
||||
default boolean containsAnyString(String... strings) {
|
||||
|
||||
return Arrays.stream(strings).anyMatch(this::containsString);
|
||||
return Arrays.stream(strings)
|
||||
.anyMatch(this::containsString);
|
||||
}
|
||||
|
||||
|
||||
@ -397,7 +426,8 @@ public interface SemanticNode {
|
||||
*/
|
||||
default boolean containsAnyString(List<String> strings) {
|
||||
|
||||
return strings.stream().anyMatch(this::containsString);
|
||||
return strings.stream()
|
||||
.anyMatch(this::containsString);
|
||||
}
|
||||
|
||||
|
||||
@ -421,7 +451,8 @@ public interface SemanticNode {
|
||||
*/
|
||||
default boolean containsAnyStringIgnoreCase(String... strings) {
|
||||
|
||||
return Arrays.stream(strings).anyMatch(this::containsStringIgnoreCase);
|
||||
return Arrays.stream(strings)
|
||||
.anyMatch(this::containsStringIgnoreCase);
|
||||
}
|
||||
|
||||
|
||||
@ -433,7 +464,8 @@ public interface SemanticNode {
|
||||
*/
|
||||
default boolean containsAllStringsIgnoreCase(String... strings) {
|
||||
|
||||
return Arrays.stream(strings).allMatch(this::containsStringIgnoreCase);
|
||||
return Arrays.stream(strings)
|
||||
.allMatch(this::containsStringIgnoreCase);
|
||||
}
|
||||
|
||||
|
||||
@ -445,7 +477,9 @@ public interface SemanticNode {
|
||||
*/
|
||||
default boolean containsWord(String word) {
|
||||
|
||||
return getTextBlock().getWords().stream().anyMatch(s -> s.equals(word));
|
||||
return getTextBlock().getWords()
|
||||
.stream()
|
||||
.anyMatch(s -> s.equals(word));
|
||||
}
|
||||
|
||||
|
||||
@ -457,7 +491,10 @@ public interface SemanticNode {
|
||||
*/
|
||||
default boolean containsWordIgnoreCase(String word) {
|
||||
|
||||
return getTextBlock().getWords().stream().map(String::toLowerCase).anyMatch(s -> s.equals(word.toLowerCase(Locale.ENGLISH)));
|
||||
return getTextBlock().getWords()
|
||||
.stream()
|
||||
.map(String::toLowerCase)
|
||||
.anyMatch(s -> s.equals(word.toLowerCase(Locale.ENGLISH)));
|
||||
}
|
||||
|
||||
|
||||
@ -469,7 +506,10 @@ public interface SemanticNode {
|
||||
*/
|
||||
default boolean containsAnyWord(String... words) {
|
||||
|
||||
return Arrays.stream(words).anyMatch(word -> getTextBlock().getWords().stream().anyMatch(word::equals));
|
||||
return Arrays.stream(words)
|
||||
.anyMatch(word -> getTextBlock().getWords()
|
||||
.stream()
|
||||
.anyMatch(word::equals));
|
||||
}
|
||||
|
||||
|
||||
@ -481,7 +521,12 @@ public interface SemanticNode {
|
||||
*/
|
||||
default boolean containsAnyWordIgnoreCase(String... words) {
|
||||
|
||||
return Arrays.stream(words).map(String::toLowerCase).anyMatch(word -> getTextBlock().getWords().stream().map(String::toLowerCase).anyMatch(word::equals));
|
||||
return Arrays.stream(words)
|
||||
.map(String::toLowerCase)
|
||||
.anyMatch(word -> getTextBlock().getWords()
|
||||
.stream()
|
||||
.map(String::toLowerCase)
|
||||
.anyMatch(word::equals));
|
||||
}
|
||||
|
||||
|
||||
@ -493,7 +538,10 @@ public interface SemanticNode {
|
||||
*/
|
||||
default boolean containsAllWords(String... words) {
|
||||
|
||||
return Arrays.stream(words).allMatch(word -> getTextBlock().getWords().stream().anyMatch(word::equals));
|
||||
return Arrays.stream(words)
|
||||
.allMatch(word -> getTextBlock().getWords()
|
||||
.stream()
|
||||
.anyMatch(word::equals));
|
||||
}
|
||||
|
||||
|
||||
@ -505,7 +553,12 @@ public interface SemanticNode {
|
||||
*/
|
||||
default boolean containsAllWordsIgnoreCase(String... words) {
|
||||
|
||||
return Arrays.stream(words).map(String::toLowerCase).allMatch(word -> getTextBlock().getWords().stream().map(String::toLowerCase).anyMatch(word::equals));
|
||||
return Arrays.stream(words)
|
||||
.map(String::toLowerCase)
|
||||
.allMatch(word -> getTextBlock().getWords()
|
||||
.stream()
|
||||
.map(String::toLowerCase)
|
||||
.anyMatch(word::equals));
|
||||
}
|
||||
|
||||
|
||||
@ -545,7 +598,11 @@ public interface SemanticNode {
|
||||
*/
|
||||
default boolean intersectsRectangle(int x, int y, int w, int h, int pageNumber) {
|
||||
|
||||
return getBBox().entrySet().stream().filter(entry -> entry.getKey().getNumber() == pageNumber).map(Map.Entry::getValue).anyMatch(rect -> rect.intersects(x, y, w, h));
|
||||
return getBBox().entrySet()
|
||||
.stream()
|
||||
.filter(entry -> entry.getKey().getNumber() == pageNumber)
|
||||
.map(Map.Entry::getValue)
|
||||
.anyMatch(rect -> rect.intersects(x, y, w, h));
|
||||
}
|
||||
|
||||
|
||||
@ -598,7 +655,8 @@ public interface SemanticNode {
|
||||
*/
|
||||
default Stream<SemanticNode> streamAllSubNodes() {
|
||||
|
||||
return getDocumentTree().allSubEntriesInOrder(getTreeId()).map(DocumentTree.Entry::getNode);
|
||||
return getDocumentTree().allSubEntriesInOrder(getTreeId())
|
||||
.map(DocumentTree.Entry::getNode);
|
||||
}
|
||||
|
||||
|
||||
@ -609,7 +667,9 @@ public interface SemanticNode {
|
||||
*/
|
||||
default Stream<SemanticNode> streamAllSubNodesOfType(NodeType nodeType) {
|
||||
|
||||
return getDocumentTree().allSubEntriesInOrder(getTreeId()).filter(entry -> entry.getType().equals(nodeType)).map(DocumentTree.Entry::getNode);
|
||||
return getDocumentTree().allSubEntriesInOrder(getTreeId())
|
||||
.filter(entry -> entry.getType().equals(nodeType))
|
||||
.map(DocumentTree.Entry::getNode);
|
||||
}
|
||||
|
||||
|
||||
@ -648,7 +708,8 @@ public interface SemanticNode {
|
||||
if (isLeaf()) {
|
||||
return getTextBlock().getPositionsPerPage(textRange);
|
||||
}
|
||||
Optional<SemanticNode> containingChildNode = streamChildren().filter(child -> child.getTextRange().contains(textRange)).findFirst();
|
||||
Optional<SemanticNode> containingChildNode = streamChildren().filter(child -> child.getTextRange().contains(textRange))
|
||||
.findFirst();
|
||||
if (containingChildNode.isEmpty()) {
|
||||
return getTextBlock().getPositionsPerPage(textRange);
|
||||
}
|
||||
@ -698,8 +759,12 @@ public interface SemanticNode {
|
||||
private Map<Page, Rectangle2D> getBBoxFromChildren() {
|
||||
|
||||
Map<Page, Rectangle2D> bBoxPerPage = new HashMap<>();
|
||||
List<Map<Page, Rectangle2D>> childrenBBoxes = streamChildren().map(SemanticNode::getBBox).toList();
|
||||
Set<Page> pages = childrenBBoxes.stream().flatMap(map -> map.keySet().stream()).collect(Collectors.toSet());
|
||||
List<Map<Page, Rectangle2D>> childrenBBoxes = streamChildren().map(SemanticNode::getBBox)
|
||||
.toList();
|
||||
Set<Page> pages = childrenBBoxes.stream()
|
||||
.flatMap(map -> map.keySet()
|
||||
.stream())
|
||||
.collect(Collectors.toSet());
|
||||
for (Page page : pages) {
|
||||
Rectangle2D bBoxOnPage = childrenBBoxes.stream()
|
||||
.filter(childBboxPerPage -> childBboxPerPage.containsKey(page))
|
||||
@ -717,7 +782,9 @@ public interface SemanticNode {
|
||||
private Map<Page, Rectangle2D> getBBoxFromLeafTextBlock() {
|
||||
|
||||
Map<Page, Rectangle2D> bBoxPerPage = new HashMap<>();
|
||||
Map<Page, List<AtomicTextBlock>> atomicTextBlockPerPage = getTextBlock().getAtomicTextBlocks().stream().collect(Collectors.groupingBy(AtomicTextBlock::getPage));
|
||||
Map<Page, List<AtomicTextBlock>> atomicTextBlockPerPage = getTextBlock().getAtomicTextBlocks()
|
||||
.stream()
|
||||
.collect(Collectors.groupingBy(AtomicTextBlock::getPage));
|
||||
atomicTextBlockPerPage.forEach((page, atomicTextBlocks) -> bBoxPerPage.put(page, RectangleTransformations.atomicTextBlockBBox(atomicTextBlocks)));
|
||||
return bBoxPerPage;
|
||||
}
|
||||
|
||||
@ -79,7 +79,9 @@ public class TableCell implements GenericSemanticNode {
|
||||
}
|
||||
|
||||
if (textBlock == null) {
|
||||
textBlock = streamAllSubNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock).collect(new TextBlockCollector());
|
||||
textBlock = streamAllSubNodes().filter(SemanticNode::isLeaf)
|
||||
.map(SemanticNode::getLeafTextBlock)
|
||||
.collect(new TextBlockCollector());
|
||||
}
|
||||
return textBlock;
|
||||
}
|
||||
|
||||
@ -61,6 +61,7 @@ public class AtomicTextBlock implements TextBlock {
|
||||
return lineBreaks.size() + 1;
|
||||
}
|
||||
|
||||
|
||||
public static AtomicTextBlock empty(Long textBlockIdx, int stringOffset, Page page, int numberOnPage, SemanticNode parent) {
|
||||
|
||||
return AtomicTextBlock.builder()
|
||||
@ -77,10 +78,7 @@ public class AtomicTextBlock implements TextBlock {
|
||||
}
|
||||
|
||||
|
||||
public static AtomicTextBlock fromAtomicTextBlockData(DocumentTextData atomicTextBlockData,
|
||||
DocumentPositionData atomicPositionBlockData,
|
||||
SemanticNode parent,
|
||||
Page page) {
|
||||
public static AtomicTextBlock fromAtomicTextBlockData(DocumentTextData atomicTextBlockData, DocumentPositionData atomicPositionBlockData, SemanticNode parent, Page page) {
|
||||
|
||||
return AtomicTextBlock.builder()
|
||||
.id(atomicTextBlockData.getId())
|
||||
@ -88,8 +86,10 @@ public class AtomicTextBlock implements TextBlock {
|
||||
.page(page)
|
||||
.textRange(new TextRange(atomicTextBlockData.getStart(), atomicTextBlockData.getEnd()))
|
||||
.searchText(atomicTextBlockData.getSearchText())
|
||||
.lineBreaks(Arrays.stream(atomicTextBlockData.getLineBreaks()).boxed().toList())
|
||||
.stringIdxToPositionIdx(Arrays.stream(atomicPositionBlockData.getStringIdxToPositionIdx()).boxed().toList())
|
||||
.lineBreaks(Arrays.stream(atomicTextBlockData.getLineBreaks()).boxed()
|
||||
.toList())
|
||||
.stringIdxToPositionIdx(Arrays.stream(atomicPositionBlockData.getStringIdxToPositionIdx()).boxed()
|
||||
.toList())
|
||||
.positions(toRectangle2DList(atomicPositionBlockData.getPositions()))
|
||||
.parent(parent)
|
||||
.build();
|
||||
@ -98,7 +98,9 @@ public class AtomicTextBlock implements TextBlock {
|
||||
|
||||
private static List<Rectangle2D> toRectangle2DList(float[][] positions) {
|
||||
|
||||
return Arrays.stream(positions).map(floatArr -> (Rectangle2D) new Rectangle2D.Float(floatArr[0], floatArr[1], floatArr[2], floatArr[3])).toList();
|
||||
return Arrays.stream(positions)
|
||||
.map(floatArr -> (Rectangle2D) new Rectangle2D.Float(floatArr[0], floatArr[1], floatArr[2], floatArr[3]))
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
@ -118,6 +120,7 @@ public class AtomicTextBlock implements TextBlock {
|
||||
return new TextRange(lineBreaks.get(lineNumber - 1) + textRange.start(), lineBreaks.get(lineNumber) + textRange.start());
|
||||
}
|
||||
|
||||
|
||||
public List<String> getWords() {
|
||||
|
||||
if (words == null) {
|
||||
@ -144,9 +147,9 @@ public class AtomicTextBlock implements TextBlock {
|
||||
public int getNextLinebreak(int fromIndex) {
|
||||
|
||||
return lineBreaks.stream()//
|
||||
.filter(linebreak -> linebreak > fromIndex - textRange.start()) //
|
||||
.findFirst() //
|
||||
.orElse(searchText.length()) + textRange.start();
|
||||
.filter(linebreak -> linebreak > fromIndex - textRange.start()) //
|
||||
.findFirst() //
|
||||
.orElse(searchText.length()) + textRange.start();
|
||||
}
|
||||
|
||||
|
||||
@ -154,9 +157,9 @@ public class AtomicTextBlock implements TextBlock {
|
||||
public int getPreviousLinebreak(int fromIndex) {
|
||||
|
||||
return lineBreaks.stream()//
|
||||
.filter(linebreak -> linebreak <= fromIndex - textRange.start())//
|
||||
.reduce((a, b) -> b)//
|
||||
.orElse(0) + textRange.start();
|
||||
.filter(linebreak -> linebreak <= fromIndex - textRange.start())//
|
||||
.reduce((a, b) -> b)//
|
||||
.orElse(0) + textRange.start();
|
||||
}
|
||||
|
||||
|
||||
@ -209,7 +212,10 @@ public class AtomicTextBlock implements TextBlock {
|
||||
return "";
|
||||
}
|
||||
|
||||
Set<Integer> lbInBoundary = lineBreaks.stream().map(i -> i + textRange.start()).filter(textRange::contains).collect(Collectors.toSet());
|
||||
Set<Integer> lbInBoundary = lineBreaks.stream()
|
||||
.map(i -> i + textRange.start())
|
||||
.filter(textRange::contains)
|
||||
.collect(Collectors.toSet());
|
||||
if (textRange.end() == getTextRange().end()) {
|
||||
lbInBoundary.add(getTextRange().end());
|
||||
}
|
||||
@ -235,7 +241,10 @@ public class AtomicTextBlock implements TextBlock {
|
||||
|
||||
private List<Integer> getAllLineBreaksInBoundary(TextRange textRange) {
|
||||
|
||||
return getLineBreaks().stream().map(linebreak -> linebreak + this.textRange.start()).filter(textRange::contains).toList();
|
||||
return getLineBreaks().stream()
|
||||
.map(linebreak -> linebreak + this.textRange.start())
|
||||
.filter(textRange::contains)
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -44,7 +44,8 @@ public class ConcatenatedTextBlock implements TextBlock {
|
||||
this.atomicTextBlocks.add(firstTextBlock);
|
||||
textRange = new TextRange(firstTextBlock.getTextRange().start(), firstTextBlock.getTextRange().end());
|
||||
|
||||
atomicTextBlocks.subList(1, atomicTextBlocks.size()).forEach(this::concat);
|
||||
atomicTextBlocks.subList(1, atomicTextBlocks.size())
|
||||
.forEach(this::concat);
|
||||
}
|
||||
|
||||
|
||||
@ -65,7 +66,10 @@ public class ConcatenatedTextBlock implements TextBlock {
|
||||
|
||||
private AtomicTextBlock getAtomicTextBlockByStringIndex(int stringIdx) {
|
||||
|
||||
return atomicTextBlocks.stream().filter(textBlock -> textBlock.getTextRange().contains(stringIdx)).findAny().orElseThrow(IndexOutOfBoundsException::new);
|
||||
return atomicTextBlocks.stream()
|
||||
.filter(textBlock -> textBlock.getTextRange().contains(stringIdx))
|
||||
.findAny()
|
||||
.orElseThrow(IndexOutOfBoundsException::new);
|
||||
}
|
||||
|
||||
|
||||
@ -99,14 +103,18 @@ public class ConcatenatedTextBlock implements TextBlock {
|
||||
@Override
|
||||
public List<String> getWords() {
|
||||
|
||||
return atomicTextBlocks.stream().map(AtomicTextBlock::getWords).flatMap(Collection::stream).toList();
|
||||
return atomicTextBlocks.stream()
|
||||
.map(AtomicTextBlock::getWords)
|
||||
.flatMap(Collection::stream)
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int numberOfLines() {
|
||||
|
||||
return atomicTextBlocks.stream().mapToInt(AtomicTextBlock::numberOfLines).sum();
|
||||
return atomicTextBlocks.stream()
|
||||
.mapToInt(AtomicTextBlock::numberOfLines).sum();
|
||||
}
|
||||
|
||||
|
||||
@ -127,7 +135,10 @@ public class ConcatenatedTextBlock implements TextBlock {
|
||||
@Override
|
||||
public List<Integer> getLineBreaks() {
|
||||
|
||||
return getAtomicTextBlocks().stream().flatMap(atomicTextBlock -> atomicTextBlock.getLineBreaks().stream()).toList();
|
||||
return getAtomicTextBlocks().stream()
|
||||
.flatMap(atomicTextBlock -> atomicTextBlock.getLineBreaks()
|
||||
.stream())
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
@ -202,7 +213,8 @@ public class ConcatenatedTextBlock implements TextBlock {
|
||||
|
||||
AtomicTextBlock lastTextBlock = textBlocks.get(textBlocks.size() - 1);
|
||||
rectanglesPerLinePerPage = mergeEntityPositionsWithSamePageNode(rectanglesPerLinePerPage,
|
||||
lastTextBlock.getPositionsPerPage(new TextRange(lastTextBlock.getTextRange().start(), stringTextRange.end())));
|
||||
lastTextBlock.getPositionsPerPage(new TextRange(lastTextBlock.getTextRange().start(),
|
||||
stringTextRange.end())));
|
||||
|
||||
return rectanglesPerLinePerPage;
|
||||
}
|
||||
@ -239,7 +251,10 @@ public class ConcatenatedTextBlock implements TextBlock {
|
||||
private Map<Page, List<Rectangle2D>> mergeEntityPositionsWithSamePageNode(Map<Page, List<Rectangle2D>> map1, Map<Page, List<Rectangle2D>> map2) {
|
||||
|
||||
Map<Page, List<Rectangle2D>> mergedMap = new HashMap<>(map1);
|
||||
map2.forEach((pageNode, rectangles) -> mergedMap.merge(pageNode, rectangles, (l1, l2) -> Stream.concat(l1.stream(), l2.stream()).toList()));
|
||||
map2.forEach((pageNode, rectangles) -> mergedMap.merge(pageNode,
|
||||
rectangles,
|
||||
(l1, l2) -> Stream.concat(l1.stream(), l2.stream())
|
||||
.toList()));
|
||||
return mergedMap;
|
||||
}
|
||||
|
||||
|
||||
@ -18,8 +18,10 @@ public interface TextBlock extends CharSequence {
|
||||
|
||||
String getSearchText();
|
||||
|
||||
|
||||
List<String> getWords();
|
||||
|
||||
|
||||
List<AtomicTextBlock> getAtomicTextBlocks();
|
||||
|
||||
|
||||
@ -35,7 +37,6 @@ public interface TextBlock extends CharSequence {
|
||||
TextRange getLineTextRange(int lineNumber);
|
||||
|
||||
|
||||
|
||||
List<Integer> getLineBreaks();
|
||||
|
||||
|
||||
@ -71,6 +72,7 @@ public interface TextBlock extends CharSequence {
|
||||
return RectangleTransformations.rectangle2DBBox(getLinePositions(lineNumber));
|
||||
}
|
||||
|
||||
|
||||
default String searchTextWithLineBreaks() {
|
||||
|
||||
return subSequenceWithLineBreaks(getTextRange());
|
||||
@ -85,7 +87,9 @@ public interface TextBlock extends CharSequence {
|
||||
|
||||
default Set<Page> getPages() {
|
||||
|
||||
return getAtomicTextBlocks().stream().map(AtomicTextBlock::getPage).collect(Collectors.toUnmodifiableSet());
|
||||
return getAtomicTextBlocks().stream()
|
||||
.map(AtomicTextBlock::getPage)
|
||||
.collect(Collectors.toUnmodifiableSet());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -9,7 +9,8 @@ public record RuleClass(RuleType ruleType, List<RuleUnit> ruleUnits) {
|
||||
public Optional<RuleUnit> findRuleUnitByInteger(Integer unit) {
|
||||
|
||||
return ruleUnits.stream()
|
||||
.filter(ruleUnit -> Objects.equals(ruleUnit.unit(), unit)).findFirst();
|
||||
.filter(ruleUnit -> Objects.equals(ruleUnit.unit(), unit))
|
||||
.findFirst();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -37,13 +37,17 @@ public final class RuleFileBluePrint {
|
||||
|
||||
public Optional<RuleClass> findRuleClassByType(RuleType ruleType) {
|
||||
|
||||
return ruleClasses.stream().filter(ruleClass -> Objects.equals(ruleClass.ruleType(), ruleType)).findFirst();
|
||||
return ruleClasses.stream()
|
||||
.filter(ruleClass -> Objects.equals(ruleClass.ruleType(), ruleType))
|
||||
.findFirst();
|
||||
}
|
||||
|
||||
|
||||
public Set<String> getImportSplitByKeyword() {
|
||||
|
||||
return Arrays.stream(imports.replaceAll("\n", "").split("import")).map(String::trim).collect(Collectors.toSet());
|
||||
return Arrays.stream(imports.replaceAll("\n", "").split("import"))
|
||||
.map(String::trim)
|
||||
.collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
|
||||
@ -53,11 +57,15 @@ public final class RuleFileBluePrint {
|
||||
return findRuleClassByType(ruleIdentifier.type()).map(RuleClass::ruleUnits)
|
||||
.orElse(Collections.emptyList())
|
||||
.stream()
|
||||
.flatMap(ruleUnit -> ruleUnit.rules().stream().filter(rule -> rule.getIdentifier().matches(ruleIdentifier)))
|
||||
.flatMap(ruleUnit -> ruleUnit.rules()
|
||||
.stream()
|
||||
.filter(rule -> rule.getIdentifier().matches(ruleIdentifier)))
|
||||
.toList();
|
||||
}
|
||||
return findRuleClassByType(ruleIdentifier.type()).flatMap(ruleClass -> ruleClass.findRuleUnitByInteger(ruleIdentifier.unit()))
|
||||
.map(ruleUnit -> ruleUnit.rules().stream().filter(rule -> rule.getIdentifier().matches(ruleIdentifier)))
|
||||
.map(ruleUnit -> ruleUnit.rules()
|
||||
.stream()
|
||||
.filter(rule -> rule.getIdentifier().matches(ruleIdentifier)))
|
||||
.orElse(Stream.empty())
|
||||
.toList();
|
||||
}
|
||||
@ -65,13 +73,18 @@ public final class RuleFileBluePrint {
|
||||
|
||||
public List<RuleIdentifier> getAllRuleIdentifiers() {
|
||||
|
||||
return streamAllRules().map(BasicRule::getIdentifier).collect(Collectors.toList());
|
||||
return streamAllRules().map(BasicRule::getIdentifier)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
public Stream<BasicRule> streamAllRules() {
|
||||
|
||||
return getRuleClasses().stream().map(RuleClass::ruleUnits).flatMap(Collection::stream).map(RuleUnit::rules).flatMap(Collection::stream);
|
||||
return getRuleClasses().stream()
|
||||
.map(RuleClass::ruleUnits)
|
||||
.flatMap(Collection::stream)
|
||||
.map(RuleUnit::rules)
|
||||
.flatMap(Collection::stream);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -42,8 +42,8 @@ public record RuleIdentifier(@NonNull RuleType type, Integer unit, Integer id) {
|
||||
public boolean matches(RuleIdentifier ruleIdentifier) {
|
||||
|
||||
return ruleIdentifier.type().equals(this.type()) && //
|
||||
(Objects.isNull(ruleIdentifier.unit()) || Objects.isNull(this.unit()) || Objects.equals(this.unit(), ruleIdentifier.unit())) && //
|
||||
(Objects.isNull(ruleIdentifier.id()) || Objects.isNull(this.id()) || Objects.equals(this.id(), ruleIdentifier.id()));
|
||||
(Objects.isNull(ruleIdentifier.unit()) || Objects.isNull(this.unit()) || Objects.equals(this.unit(), ruleIdentifier.unit())) && //
|
||||
(Objects.isNull(ruleIdentifier.id()) || Objects.isNull(this.id()) || Objects.equals(this.id(), ruleIdentifier.id()));
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -70,6 +70,7 @@ public class MessagingConfiguration {
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
public Queue redactionAnalysisResponseQueue() {
|
||||
|
||||
|
||||
@ -51,14 +51,14 @@ public class RedactionMessageReceiver {
|
||||
// This prevents from endless retries oom errors.
|
||||
if (message.getMessageProperties().isRedelivered()) {
|
||||
var errorMessage = format("Error during last processing of request with dossierId: %s and fileId: %s, do not retry.",
|
||||
analyzeRequest.getDossierId(),
|
||||
analyzeRequest.getFileId());
|
||||
analyzeRequest.getDossierId(),
|
||||
analyzeRequest.getFileId());
|
||||
fileStatusProcessingUpdateClient.analysisFailed(analyzeRequest.getDossierId(),
|
||||
analyzeRequest.getFileId(),
|
||||
new FileErrorInfo(errorMessage,
|
||||
priority ? REDACTION_PRIORITY_QUEUE : REDACTION_QUEUE,
|
||||
"redaction-service",
|
||||
OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS)));
|
||||
analyzeRequest.getFileId(),
|
||||
new FileErrorInfo(errorMessage,
|
||||
priority ? REDACTION_PRIORITY_QUEUE : REDACTION_QUEUE,
|
||||
"redaction-service",
|
||||
OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS)));
|
||||
throw new AmqpRejectAndDontRequeueException(errorMessage);
|
||||
}
|
||||
|
||||
@ -84,9 +84,9 @@ public class RedactionMessageReceiver {
|
||||
log.debug(analyzeRequest.getManualRedactions().toString());
|
||||
result = analyzeService.analyze(analyzeRequest);
|
||||
log.info("Successfully analyzed dossier {} file {} took: {} s",
|
||||
analyzeRequest.getDossierId(),
|
||||
analyzeRequest.getFileId(),
|
||||
format("%.2f", result.getDuration() / 1000.0));
|
||||
analyzeRequest.getDossierId(),
|
||||
analyzeRequest.getFileId(),
|
||||
format("%.2f", result.getDuration() / 1000.0));
|
||||
log.info("----------------------------------------------------------------------------------");
|
||||
break;
|
||||
|
||||
@ -96,9 +96,9 @@ public class RedactionMessageReceiver {
|
||||
log.debug(analyzeRequest.getManualRedactions().toString());
|
||||
result = analyzeService.reanalyze(analyzeRequest);
|
||||
log.info("Successfully reanalyzed dossier {} file {} took: {} s",
|
||||
analyzeRequest.getDossierId(),
|
||||
analyzeRequest.getFileId(),
|
||||
format("%.2f", result.getDuration() / 1000.0));
|
||||
analyzeRequest.getDossierId(),
|
||||
analyzeRequest.getFileId(),
|
||||
format("%.2f", result.getDuration() / 1000.0));
|
||||
log.info("----------------------------------------------------------------------------------");
|
||||
break;
|
||||
case SURROUNDING_TEXT_ANALYSIS:
|
||||
@ -106,9 +106,7 @@ public class RedactionMessageReceiver {
|
||||
log.info("Starting Surrounding Text Analysis for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
log.debug(analyzeRequest.getManualRedactions().toString());
|
||||
unprocessedChangesService.analyseSurroundingText(analyzeRequest);
|
||||
log.info("Successful Surrounding Text Analysis dossier {} file {} ",
|
||||
analyzeRequest.getDossierId(),
|
||||
analyzeRequest.getFileId());
|
||||
log.info("Successful Surrounding Text Analysis dossier {} file {} ", analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
log.info("-------------------------------------------------------------------------------------------------");
|
||||
shouldRespond = false;
|
||||
break;
|
||||
@ -137,8 +135,8 @@ public class RedactionMessageReceiver {
|
||||
log.warn("Failed to process analyze request: {}", analyzeRequest, e);
|
||||
var timestamp = OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS);
|
||||
fileStatusProcessingUpdateClient.analysisFailed(analyzeRequest.getDossierId(),
|
||||
analyzeRequest.getFileId(),
|
||||
new FileErrorInfo(e.getMessage(), priority ? REDACTION_PRIORITY_QUEUE : REDACTION_QUEUE, "redaction-service", timestamp));
|
||||
analyzeRequest.getFileId(),
|
||||
new FileErrorInfo(e.getMessage(), priority ? REDACTION_PRIORITY_QUEUE : REDACTION_QUEUE, "redaction-service", timestamp));
|
||||
}
|
||||
|
||||
|
||||
@ -153,8 +151,8 @@ public class RedactionMessageReceiver {
|
||||
timestamp = timestamp != null ? timestamp : OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS);
|
||||
log.info("Failed to process analyze request, errorCause: {}, timestamp: {}", errorCause, timestamp);
|
||||
fileStatusProcessingUpdateClient.analysisFailed(analyzeRequest.getDossierId(),
|
||||
analyzeRequest.getFileId(),
|
||||
new FileErrorInfo(errorCause, REDACTION_DQL, "redaction-service", timestamp));
|
||||
analyzeRequest.getFileId(),
|
||||
new FileErrorInfo(errorCause, REDACTION_DQL, "redaction-service", timestamp));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -23,13 +23,15 @@ public class ComponentLogCreatorService {
|
||||
public ComponentLog buildComponentLog(int analysisNumber, List<Component> components, long componentRulesVersion) {
|
||||
|
||||
Map<String, List<ComponentLogEntryValue>> map = new HashMap<>();
|
||||
components.stream().sorted(ComponentComparator.first()).forEach(component -> {
|
||||
ComponentLogEntryValue componentLogEntryValue = buildComponentLogEntry(component);
|
||||
map.computeIfAbsent(component.getName(), k -> new ArrayList<>()).add(componentLogEntryValue);
|
||||
});
|
||||
List<ComponentLogEntry> componentLogComponents = map
|
||||
.entrySet()
|
||||
.stream().map(entry -> new ComponentLogEntry(entry.getKey(), entry.getValue()))
|
||||
components.stream()
|
||||
.sorted(ComponentComparator.first())
|
||||
.forEach(component -> {
|
||||
ComponentLogEntryValue componentLogEntryValue = buildComponentLogEntry(component);
|
||||
map.computeIfAbsent(component.getName(), k -> new ArrayList<>()).add(componentLogEntryValue);
|
||||
});
|
||||
List<ComponentLogEntry> componentLogComponents = map.entrySet()
|
||||
.stream()
|
||||
.map(entry -> new ComponentLogEntry(entry.getKey(), entry.getValue()))
|
||||
.toList();
|
||||
return new ComponentLog(analysisNumber, componentRulesVersion, componentLogComponents);
|
||||
}
|
||||
@ -38,24 +40,36 @@ public class ComponentLogCreatorService {
|
||||
private ComponentLogEntryValue buildComponentLogEntry(Component component) {
|
||||
|
||||
return ComponentLogEntryValue.builder()
|
||||
.value(component.getValue()).originalValue(component.getValue())
|
||||
.value(component.getValue())
|
||||
.originalValue(component.getValue())
|
||||
.componentRuleId(component.getMatchedRule().toString())
|
||||
.valueDescription(component.getValueDescription())
|
||||
.componentLogEntityReferences(toComponentEntityReferences(component.getReferences().stream().sorted(EntityComparators.first()).toList()))
|
||||
.componentLogEntityReferences(toComponentEntityReferences(component.getReferences()
|
||||
.stream()
|
||||
.sorted(EntityComparators.first())
|
||||
.toList()))
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
private List<ComponentLogEntityReference> toComponentEntityReferences(List<Entity> references) {
|
||||
|
||||
return references.stream().map(this::toComponentEntityReference).toList();
|
||||
return references.stream()
|
||||
.map(this::toComponentEntityReference)
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
private ComponentLogEntityReference toComponentEntityReference(Entity entity) {
|
||||
|
||||
return ComponentLogEntityReference.builder().id(entity.getId())
|
||||
.page(entity.getPositions().stream().findFirst().map(Position::getPageNumber).orElse(0)).entityRuleId(entity.getMatchedRule())
|
||||
return ComponentLogEntityReference.builder()
|
||||
.id(entity.getId())
|
||||
.page(entity.getPositions()
|
||||
.stream()
|
||||
.findFirst()
|
||||
.map(Position::getPageNumber)
|
||||
.orElse(0))
|
||||
.entityRuleId(entity.getMatchedRule())
|
||||
.type(entity.getType())
|
||||
.build();
|
||||
}
|
||||
|
||||
@ -56,10 +56,11 @@ public class DictionarySearchService {
|
||||
searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange())
|
||||
.stream()
|
||||
.filter(boundary -> entityCreationService.isValidEntityTextRange(node.getTextBlock(), boundary))
|
||||
.forEach(bounds -> entityCreationService.byTextRangeWithEngine(bounds, type, entityType, node, Set.of(Engine.DICTIONARY)).ifPresent(entity -> {
|
||||
entity.setDictionaryEntry(true);
|
||||
entity.setDossierDictionaryEntry(isDossierDictionaryEntry);
|
||||
}));
|
||||
.forEach(bounds -> entityCreationService.byTextRangeWithEngine(bounds, type, entityType, node, Set.of(Engine.DICTIONARY))
|
||||
.ifPresent(entity -> {
|
||||
entity.setDictionaryEntry(true);
|
||||
entity.setDossierDictionaryEntry(isDossierDictionaryEntry);
|
||||
}));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -106,41 +106,47 @@ public class DictionaryService {
|
||||
List<DictionaryModel> dictionaryModels = getDossierTemplateDictionary(dossierTemplateId).getDictionary();
|
||||
|
||||
dictionaryModels.forEach(dictionaryModel -> {
|
||||
dictionaryModel.getEntries().forEach(dictionaryEntry -> {
|
||||
if (dictionaryEntry.getVersion() > fromVersion.getDossierTemplateVersion()) {
|
||||
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
|
||||
}
|
||||
});
|
||||
dictionaryModel.getFalsePositives().forEach(dictionaryEntry -> {
|
||||
if (dictionaryEntry.getVersion() > fromVersion.getDossierTemplateVersion()) {
|
||||
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
|
||||
}
|
||||
});
|
||||
dictionaryModel.getFalseRecommendations().forEach(dictionaryEntry -> {
|
||||
if (dictionaryEntry.getVersion() > fromVersion.getDossierTemplateVersion()) {
|
||||
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
|
||||
}
|
||||
});
|
||||
dictionaryModel.getEntries()
|
||||
.forEach(dictionaryEntry -> {
|
||||
if (dictionaryEntry.getVersion() > fromVersion.getDossierTemplateVersion()) {
|
||||
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
|
||||
}
|
||||
});
|
||||
dictionaryModel.getFalsePositives()
|
||||
.forEach(dictionaryEntry -> {
|
||||
if (dictionaryEntry.getVersion() > fromVersion.getDossierTemplateVersion()) {
|
||||
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
|
||||
}
|
||||
});
|
||||
dictionaryModel.getFalseRecommendations()
|
||||
.forEach(dictionaryEntry -> {
|
||||
if (dictionaryEntry.getVersion() > fromVersion.getDossierTemplateVersion()) {
|
||||
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
if (dossierDictionaryExists(dossierId)) {
|
||||
dictionaryModels = getDossierDictionary(dossierId).getDictionary();
|
||||
dictionaryModels.forEach(dictionaryModel -> {
|
||||
dictionaryModel.getEntries().forEach(dictionaryEntry -> {
|
||||
if (dictionaryEntry.getVersion() > fromVersion.getDossierVersion()) {
|
||||
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
|
||||
}
|
||||
});
|
||||
dictionaryModel.getFalsePositives().forEach(dictionaryEntry -> {
|
||||
if (dictionaryEntry.getVersion() > fromVersion.getDossierVersion()) {
|
||||
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
|
||||
}
|
||||
});
|
||||
dictionaryModel.getFalseRecommendations().forEach(dictionaryEntry -> {
|
||||
if (dictionaryEntry.getVersion() > fromVersion.getDossierVersion()) {
|
||||
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
|
||||
}
|
||||
});
|
||||
dictionaryModel.getEntries()
|
||||
.forEach(dictionaryEntry -> {
|
||||
if (dictionaryEntry.getVersion() > fromVersion.getDossierVersion()) {
|
||||
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
|
||||
}
|
||||
});
|
||||
dictionaryModel.getFalsePositives()
|
||||
.forEach(dictionaryEntry -> {
|
||||
if (dictionaryEntry.getVersion() > fromVersion.getDossierVersion()) {
|
||||
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
|
||||
}
|
||||
});
|
||||
dictionaryModel.getFalseRecommendations()
|
||||
.forEach(dictionaryEntry -> {
|
||||
if (dictionaryEntry.getVersion() > fromVersion.getDossierVersion()) {
|
||||
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@ -155,84 +161,120 @@ public class DictionaryService {
|
||||
DictionaryRepresentation dictionaryRepresentation = new DictionaryRepresentation();
|
||||
|
||||
var typeResponse = dossierId == null ? dictionaryClient.getAllTypesForDossierTemplate(dossierTemplateId, true) : dictionaryClient.getAllTypesForDossier(dossierId,
|
||||
true);
|
||||
true);
|
||||
if (CollectionUtils.isNotEmpty(typeResponse)) {
|
||||
|
||||
List<DictionaryModel> dictionary = typeResponse.stream().map(t -> {
|
||||
List<DictionaryModel> dictionary = typeResponse.stream()
|
||||
.map(t -> {
|
||||
|
||||
Optional<DictionaryModel> optionalOldModel;
|
||||
if (dossierId == null) {
|
||||
var representation = getDossierTemplateDictionary(dossierTemplateId);
|
||||
optionalOldModel = representation != null ? representation.getDictionary()
|
||||
.stream()
|
||||
.filter(f -> f.getType().equals(t.getType()))
|
||||
.findAny() : Optional.empty();
|
||||
} else {
|
||||
var representation = getDossierDictionary(dossierId);
|
||||
optionalOldModel = representation != null ? representation.getDictionary()
|
||||
.stream()
|
||||
.filter(f -> f.getType().equals(t.getType()))
|
||||
.findAny() : Optional.empty();
|
||||
}
|
||||
Optional<DictionaryModel> optionalOldModel;
|
||||
if (dossierId == null) {
|
||||
var representation = getDossierTemplateDictionary(dossierTemplateId);
|
||||
optionalOldModel = representation != null ? representation.getDictionary()
|
||||
.stream()
|
||||
.filter(f -> f.getType().equals(t.getType()))
|
||||
.findAny() : Optional.empty();
|
||||
} else {
|
||||
var representation = getDossierDictionary(dossierId);
|
||||
optionalOldModel = representation != null ? representation.getDictionary()
|
||||
.stream()
|
||||
.filter(f -> f.getType().equals(t.getType()))
|
||||
.findAny() : Optional.empty();
|
||||
}
|
||||
|
||||
Set<DictionaryEntryModel> entries = new HashSet<>();
|
||||
Set<DictionaryEntryModel> falsePositives = new HashSet<>();
|
||||
Set<DictionaryEntryModel> falseRecommendations = new HashSet<>();
|
||||
Set<DictionaryEntryModel> entries = new HashSet<>();
|
||||
Set<DictionaryEntryModel> falsePositives = new HashSet<>();
|
||||
Set<DictionaryEntryModel> falseRecommendations = new HashSet<>();
|
||||
|
||||
DictionaryEntries newEntries = getEntries(t.getId(), currentVersion);
|
||||
DictionaryEntries newEntries = getEntries(t.getId(), currentVersion);
|
||||
|
||||
var newValues = newEntries.getEntries().stream().map(DictionaryEntry::getValue).collect(Collectors.toSet());
|
||||
var newFalsePositivesValues = newEntries.getFalsePositives().stream().map(DictionaryEntry::getValue).collect(Collectors.toSet());
|
||||
var newFalseRecommendationsValues = newEntries.getFalseRecommendations().stream().map(DictionaryEntry::getValue).collect(Collectors.toSet());
|
||||
|
||||
optionalOldModel.ifPresent(oldDictionaryModel -> {
|
||||
|
||||
});
|
||||
if (optionalOldModel.isPresent()) {
|
||||
var oldModel = optionalOldModel.get();
|
||||
if (oldModel.isCaseInsensitive() && !t.isCaseInsensitive()) {
|
||||
// add old entries from existing DictionaryModel but exclude lower case representation
|
||||
entries.addAll(oldModel.getEntries().stream().filter(f -> !newValues.stream().map(s -> s.toLowerCase(Locale.ROOT)).toList().contains(f.getValue())).toList());
|
||||
falsePositives.addAll(oldModel.getFalsePositives()
|
||||
var newValues = newEntries.getEntries()
|
||||
.stream()
|
||||
.filter(f -> !newFalsePositivesValues.stream().map(s -> s.toLowerCase(Locale.ROOT)).toList().contains(f.getValue()))
|
||||
.toList());
|
||||
falseRecommendations.addAll(oldModel.getFalseRecommendations()
|
||||
.map(DictionaryEntry::getValue)
|
||||
.collect(Collectors.toSet());
|
||||
var newFalsePositivesValues = newEntries.getFalsePositives()
|
||||
.stream()
|
||||
.filter(f -> !newFalseRecommendationsValues.stream().map(s -> s.toLowerCase(Locale.ROOT)).toList().contains(f.getValue()))
|
||||
.toList());
|
||||
} else if (!oldModel.isCaseInsensitive() && t.isCaseInsensitive()) {
|
||||
// add old entries from existing DictionaryModel but exclude upper case representation
|
||||
entries.addAll(oldModel.getEntries().stream().filter(f -> !newValues.contains(f.getValue().toLowerCase(Locale.ROOT))).toList());
|
||||
falsePositives.addAll(oldModel.getFalsePositives().stream().filter(f -> !newFalsePositivesValues.contains(f.getValue().toLowerCase(Locale.ROOT))).toList());
|
||||
falseRecommendations.addAll(oldModel.getFalseRecommendations()
|
||||
.map(DictionaryEntry::getValue)
|
||||
.collect(Collectors.toSet());
|
||||
var newFalseRecommendationsValues = newEntries.getFalseRecommendations()
|
||||
.stream()
|
||||
.filter(f -> !newFalseRecommendationsValues.contains(f.getValue().toLowerCase(Locale.ROOT)))
|
||||
.toList());
|
||||
.map(DictionaryEntry::getValue)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
} else {
|
||||
// add old entries from existing DictionaryModel
|
||||
entries.addAll(oldModel.getEntries().stream().filter(f -> !newValues.contains(f.getValue())).toList());
|
||||
falsePositives.addAll(oldModel.getFalsePositives().stream().filter(f -> !newFalsePositivesValues.contains(f.getValue())).toList());
|
||||
falseRecommendations.addAll(oldModel.getFalseRecommendations().stream().filter(f -> !newFalseRecommendationsValues.contains(f.getValue())).toList());
|
||||
}
|
||||
}
|
||||
optionalOldModel.ifPresent(oldDictionaryModel -> {
|
||||
|
||||
// Add Increments
|
||||
entries.addAll(newEntries.getEntries());
|
||||
falsePositives.addAll(newEntries.getFalsePositives());
|
||||
falseRecommendations.addAll(newEntries.getFalseRecommendations());
|
||||
});
|
||||
if (optionalOldModel.isPresent()) {
|
||||
var oldModel = optionalOldModel.get();
|
||||
if (oldModel.isCaseInsensitive() && !t.isCaseInsensitive()) {
|
||||
// add old entries from existing DictionaryModel but exclude lower case representation
|
||||
entries.addAll(oldModel.getEntries()
|
||||
.stream()
|
||||
.filter(f -> !newValues.stream()
|
||||
.map(s -> s.toLowerCase(Locale.ROOT))
|
||||
.toList().contains(f.getValue()))
|
||||
.toList());
|
||||
falsePositives.addAll(oldModel.getFalsePositives()
|
||||
.stream()
|
||||
.filter(f -> !newFalsePositivesValues.stream()
|
||||
.map(s -> s.toLowerCase(Locale.ROOT))
|
||||
.toList().contains(f.getValue()))
|
||||
.toList());
|
||||
falseRecommendations.addAll(oldModel.getFalseRecommendations()
|
||||
.stream()
|
||||
.filter(f -> !newFalseRecommendationsValues.stream()
|
||||
.map(s -> s.toLowerCase(Locale.ROOT))
|
||||
.toList().contains(f.getValue()))
|
||||
.toList());
|
||||
} else if (!oldModel.isCaseInsensitive() && t.isCaseInsensitive()) {
|
||||
// add old entries from existing DictionaryModel but exclude upper case representation
|
||||
entries.addAll(oldModel.getEntries()
|
||||
.stream()
|
||||
.filter(f -> !newValues.contains(f.getValue().toLowerCase(Locale.ROOT)))
|
||||
.toList());
|
||||
falsePositives.addAll(oldModel.getFalsePositives()
|
||||
.stream()
|
||||
.filter(f -> !newFalsePositivesValues.contains(f.getValue().toLowerCase(Locale.ROOT)))
|
||||
.toList());
|
||||
falseRecommendations.addAll(oldModel.getFalseRecommendations()
|
||||
.stream()
|
||||
.filter(f -> !newFalseRecommendationsValues.contains(f.getValue().toLowerCase(Locale.ROOT)))
|
||||
.toList());
|
||||
|
||||
return new DictionaryModel(t.getType(),
|
||||
t.getRank(),
|
||||
convertColor(t.getHexColor()),
|
||||
t.isCaseInsensitive(),
|
||||
t.isHint(),
|
||||
entries,
|
||||
falsePositives,
|
||||
falseRecommendations,
|
||||
dossierId != null);
|
||||
}).sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed()).collect(Collectors.toList());
|
||||
} else {
|
||||
// add old entries from existing DictionaryModel
|
||||
entries.addAll(oldModel.getEntries()
|
||||
.stream()
|
||||
.filter(f -> !newValues.contains(f.getValue()))
|
||||
.toList());
|
||||
falsePositives.addAll(oldModel.getFalsePositives()
|
||||
.stream()
|
||||
.filter(f -> !newFalsePositivesValues.contains(f.getValue()))
|
||||
.toList());
|
||||
falseRecommendations.addAll(oldModel.getFalseRecommendations()
|
||||
.stream()
|
||||
.filter(f -> !newFalseRecommendationsValues.contains(f.getValue()))
|
||||
.toList());
|
||||
}
|
||||
}
|
||||
|
||||
// Add Increments
|
||||
entries.addAll(newEntries.getEntries());
|
||||
falsePositives.addAll(newEntries.getFalsePositives());
|
||||
falseRecommendations.addAll(newEntries.getFalseRecommendations());
|
||||
|
||||
return new DictionaryModel(t.getType(),
|
||||
t.getRank(),
|
||||
convertColor(t.getHexColor()),
|
||||
t.isCaseInsensitive(),
|
||||
t.isHint(),
|
||||
entries,
|
||||
falsePositives,
|
||||
falseRecommendations,
|
||||
dossierId != null);
|
||||
})
|
||||
.sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed())
|
||||
.collect(Collectors.toList());
|
||||
|
||||
dictionary.forEach(dm -> dictionaryRepresentation.getLocalAccessMap().put(dm.getType(), dm));
|
||||
|
||||
@ -264,17 +306,17 @@ public class DictionaryService {
|
||||
var type = dictionaryClient.getDictionaryForType(typeId, fromVersion);
|
||||
|
||||
Set<DictionaryEntryModel> entries = type.getEntries() != null ? new HashSet<>(type.getEntries()
|
||||
.stream()
|
||||
.map(DictionaryEntryModel::new)
|
||||
.collect(Collectors.toSet())) : new HashSet<>();
|
||||
.stream()
|
||||
.map(DictionaryEntryModel::new)
|
||||
.collect(Collectors.toSet())) : new HashSet<>();
|
||||
Set<DictionaryEntryModel> falsePositives = type.getFalsePositiveEntries() != null ? new HashSet<>(type.getFalsePositiveEntries()
|
||||
.stream()
|
||||
.map(DictionaryEntryModel::new)
|
||||
.collect(Collectors.toSet())) : new HashSet<>();
|
||||
.stream()
|
||||
.map(DictionaryEntryModel::new)
|
||||
.collect(Collectors.toSet())) : new HashSet<>();
|
||||
Set<DictionaryEntryModel> falseRecommendations = type.getFalseRecommendationEntries() != null ? new HashSet<>(type.getFalseRecommendationEntries()
|
||||
.stream()
|
||||
.map(DictionaryEntryModel::new)
|
||||
.collect(Collectors.toSet())) : new HashSet<>();
|
||||
.stream()
|
||||
.map(DictionaryEntryModel::new)
|
||||
.collect(Collectors.toSet())) : new HashSet<>();
|
||||
|
||||
if (type.isCaseInsensitive()) {
|
||||
entries.forEach(entry -> entry.setValue(entry.getValue().toLowerCase(Locale.ROOT)));
|
||||
@ -282,10 +324,10 @@ public class DictionaryService {
|
||||
falseRecommendations.forEach(entry -> entry.setValue(entry.getValue().toLowerCase(Locale.ROOT)));
|
||||
}
|
||||
log.debug("Dictionary update returned {} entries {} falsePositives and {} falseRecommendations for type {}",
|
||||
entries.size(),
|
||||
falsePositives.size(),
|
||||
falseRecommendations.size(),
|
||||
typeId);
|
||||
entries.size(),
|
||||
falsePositives.size(),
|
||||
falseRecommendations.size(),
|
||||
typeId);
|
||||
return new DictionaryEntries(entries, falsePositives, falseRecommendations);
|
||||
}
|
||||
|
||||
@ -300,7 +342,8 @@ public class DictionaryService {
|
||||
@SneakyThrows
|
||||
public float[] getColor(String type, String dossierTemplateId) {
|
||||
|
||||
DictionaryModel model = getDossierTemplateDictionary(dossierTemplateId).getLocalAccessMap().get(type);
|
||||
DictionaryModel model = getDossierTemplateDictionary(dossierTemplateId).getLocalAccessMap()
|
||||
.get(type);
|
||||
if (model != null) {
|
||||
return model.getColor();
|
||||
}
|
||||
@ -311,7 +354,8 @@ public class DictionaryService {
|
||||
@SneakyThrows
|
||||
public boolean isHint(String type, String dossierTemplateId) {
|
||||
|
||||
DictionaryModel model = getDossierTemplateDictionary(dossierTemplateId).getLocalAccessMap().get(type);
|
||||
DictionaryModel model = getDossierTemplateDictionary(dossierTemplateId).getLocalAccessMap()
|
||||
.get(type);
|
||||
if (model != null) {
|
||||
return model.isHint();
|
||||
}
|
||||
@ -335,15 +379,20 @@ public class DictionaryService {
|
||||
var dossierRepresentation = getDossierDictionary(dossierId);
|
||||
var dossierDictionaries = dossierRepresentation.getDictionary();
|
||||
mergedDictionaries = convertCommonsDictionaryModel(dictionaryMergeService.getMergedDictionary(convertDictionaryModel(dossierTemplateDictionaries),
|
||||
convertDictionaryModel(dossierDictionaries)));
|
||||
convertDictionaryModel(dossierDictionaries)));
|
||||
dossierDictionaryVersion = dossierRepresentation.getDictionaryVersion();
|
||||
} else {
|
||||
mergedDictionaries = new ArrayList<>();
|
||||
dossierTemplateDictionaries.forEach(dm -> mergedDictionaries.add(SerializationUtils.clone(dm)));
|
||||
}
|
||||
|
||||
return new Dictionary(mergedDictionaries.stream().sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed()).collect(Collectors.toList()),
|
||||
DictionaryVersion.builder().dossierTemplateVersion(dossierTemplateRepresentation.getDictionaryVersion()).dossierVersion(dossierDictionaryVersion).build());
|
||||
return new Dictionary(mergedDictionaries.stream()
|
||||
.sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed())
|
||||
.collect(Collectors.toList()),
|
||||
DictionaryVersion.builder()
|
||||
.dossierTemplateVersion(dossierTemplateRepresentation.getDictionaryVersion())
|
||||
.dossierVersion(dossierDictionaryVersion)
|
||||
.build());
|
||||
}
|
||||
|
||||
|
||||
@ -371,14 +420,16 @@ public class DictionaryService {
|
||||
@SneakyThrows
|
||||
private DictionaryRepresentation getDossierTemplateDictionary(String dossierTemplateId) {
|
||||
|
||||
return tenantDictionaryCache.get(TenantContext.getTenantId()).getDictionariesByDossierTemplate().get(dossierTemplateId);
|
||||
return tenantDictionaryCache.get(TenantContext.getTenantId()).getDictionariesByDossierTemplate()
|
||||
.get(dossierTemplateId);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private DictionaryRepresentation getDossierDictionary(String dossierId) {
|
||||
|
||||
return tenantDictionaryCache.get(TenantContext.getTenantId()).getDictionariesByDossier().get(dossierId);
|
||||
return tenantDictionaryCache.get(TenantContext.getTenantId()).getDictionariesByDossier()
|
||||
.get(dossierId);
|
||||
}
|
||||
|
||||
|
||||
@ -421,14 +472,14 @@ public class DictionaryService {
|
||||
|
||||
return commonsDictionaries.stream()
|
||||
.map(cd -> new DictionaryModel(cd.getType(),
|
||||
cd.getRank(),
|
||||
cd.getColor(),
|
||||
cd.isCaseInsensitive(),
|
||||
cd.isHint(),
|
||||
cd.getEntries(),
|
||||
cd.getFalsePositives(),
|
||||
cd.getFalseRecommendations(),
|
||||
cd.isDossierDictionary()))
|
||||
cd.getRank(),
|
||||
cd.getColor(),
|
||||
cd.isCaseInsensitive(),
|
||||
cd.isHint(),
|
||||
cd.getEntries(),
|
||||
cd.getFalsePositives(),
|
||||
cd.getFalseRecommendations(),
|
||||
cd.isDossierDictionary()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
@ -30,7 +30,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||
public class EntityChangeLogService {
|
||||
|
||||
@Timed("redactmanager_computeChanges")
|
||||
public boolean computeChanges(List<EntityLogEntry> previousEntityLogEntries, List<EntityLogEntry> newEntityLogEntries, ManualRedactions manualRedactions, int analysisNumber) {
|
||||
public boolean computeChanges(List<EntityLogEntry> previousEntityLogEntries, List<EntityLogEntry> newEntityLogEntries, int analysisNumber) {
|
||||
|
||||
var now = OffsetDateTime.now();
|
||||
if (previousEntityLogEntries.isEmpty()) {
|
||||
@ -58,16 +58,12 @@ public class EntityChangeLogService {
|
||||
entityLogEntry.getChanges().add(new Change(analysisNumber, changeType, now));
|
||||
}
|
||||
}
|
||||
addRemovedEntriesAsRemoved(previousEntityLogEntries, newEntityLogEntries, manualRedactions, analysisNumber, now);
|
||||
addRemovedEntriesAsRemoved(previousEntityLogEntries, newEntityLogEntries, analysisNumber, now);
|
||||
return hasChanges;
|
||||
}
|
||||
|
||||
|
||||
private void addRemovedEntriesAsRemoved(List<EntityLogEntry> previousEntityLogEntries,
|
||||
List<EntityLogEntry> newEntityLogEntries,
|
||||
ManualRedactions manualRedactions,
|
||||
int analysisNumber,
|
||||
OffsetDateTime now) {
|
||||
private void addRemovedEntriesAsRemoved(List<EntityLogEntry> previousEntityLogEntries, List<EntityLogEntry> newEntityLogEntries, int analysisNumber, OffsetDateTime now) {
|
||||
|
||||
Set<String> existingIds = newEntityLogEntries.stream()
|
||||
.map(EntityLogEntry::getId)
|
||||
|
||||
@ -70,7 +70,7 @@ public class EntityLogCreatorService {
|
||||
|
||||
List<EntityLogEntry> previousExistingEntityLogEntries = getPreviousEntityLogEntries(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
|
||||
entityChangeLogService.computeChanges(previousExistingEntityLogEntries, entityLogEntries, analyzeRequest.getManualRedactions(), analyzeRequest.getAnalysisNumber());
|
||||
entityChangeLogService.computeChanges(previousExistingEntityLogEntries, entityLogEntries, analyzeRequest.getAnalysisNumber());
|
||||
|
||||
return new EntityLog(redactionServiceSettings.getAnalysisVersion(),
|
||||
analyzeRequest.getAnalysisNumber(),
|
||||
@ -128,10 +128,8 @@ public class EntityLogCreatorService {
|
||||
.collect(Collectors.toList());
|
||||
previousEntityLog.getEntityLogEntry().removeAll(previousEntriesFromReAnalyzedSections);
|
||||
|
||||
boolean hasChanges = entityChangeLogService.computeChanges(previousEntriesFromReAnalyzedSections,
|
||||
newEntityLogEntries,
|
||||
analyzeRequest.getManualRedactions(),
|
||||
analyzeRequest.getAnalysisNumber());
|
||||
boolean hasChanges = entityChangeLogService.computeChanges(previousEntriesFromReAnalyzedSections, newEntityLogEntries, analyzeRequest.getAnalysisNumber());
|
||||
|
||||
previousEntityLog.getEntityLogEntry().addAll(newEntityLogEntries);
|
||||
|
||||
return updateVersionsAndReturnChanges(previousEntityLog, dictionaryVersion, analyzeRequest, hasChanges);
|
||||
|
||||
@ -74,9 +74,9 @@ public class ManualChangesApplicationService {
|
||||
.orElseThrow(() -> new NoSuchElementException("No redaction position with matching annotation id found!"));
|
||||
|
||||
positionOnPageToBeResized.setRectanglePerLine(manualResizeRedaction.getPositions()
|
||||
.stream()
|
||||
.map(ManualChangesApplicationService::toRectangle2D)
|
||||
.collect(Collectors.toList()));
|
||||
.stream()
|
||||
.map(ManualChangesApplicationService::toRectangle2D)
|
||||
.collect(Collectors.toList()));
|
||||
|
||||
entityToBeResized.getManualOverwrite().addChange(manualResizeRedaction);
|
||||
|
||||
@ -90,11 +90,17 @@ public class ManualChangesApplicationService {
|
||||
|
||||
if (closestEntity.isPresent()) {
|
||||
copyValuesFromClosestEntity(entityToBeResized, manualResizeRedaction, closestEntity.get());
|
||||
possibleEntities.values().stream().flatMap(Collection::stream).forEach(TextEntity::removeFromGraph);
|
||||
possibleEntities.values()
|
||||
.stream()
|
||||
.flatMap(Collection::stream)
|
||||
.forEach(TextEntity::removeFromGraph);
|
||||
return;
|
||||
}
|
||||
|
||||
possibleEntities.values().stream().flatMap(Collection::stream).forEach(TextEntity::removeFromGraph);
|
||||
possibleEntities.values()
|
||||
.stream()
|
||||
.flatMap(Collection::stream)
|
||||
.forEach(TextEntity::removeFromGraph);
|
||||
|
||||
if (node.hasParent()) {
|
||||
node = node.getParent();
|
||||
@ -110,14 +116,18 @@ public class ManualChangesApplicationService {
|
||||
Set<SemanticNode> currentIntersectingNodes = new HashSet<>(entityToBeResized.getIntersectingNodes());
|
||||
Set<SemanticNode> newIntersectingNodes = new HashSet<>(closestEntity.getIntersectingNodes());
|
||||
|
||||
Sets.difference(currentIntersectingNodes, newIntersectingNodes).forEach(removedNode -> removedNode.getEntities().remove(entityToBeResized));
|
||||
Sets.difference(newIntersectingNodes, currentIntersectingNodes).forEach(addedNode -> addedNode.getEntities().add(entityToBeResized));
|
||||
Sets.difference(currentIntersectingNodes, newIntersectingNodes)
|
||||
.forEach(removedNode -> removedNode.getEntities().remove(entityToBeResized));
|
||||
Sets.difference(newIntersectingNodes, currentIntersectingNodes)
|
||||
.forEach(addedNode -> addedNode.getEntities().add(entityToBeResized));
|
||||
|
||||
Set<Page> currentIntersectingPages = new HashSet<>(entityToBeResized.getPages());
|
||||
Set<Page> newIntersectingPages = new HashSet<>(closestEntity.getPages());
|
||||
|
||||
Sets.difference(currentIntersectingPages, newIntersectingPages).forEach(removedPage -> removedPage.getEntities().remove(entityToBeResized));
|
||||
Sets.difference(newIntersectingPages, currentIntersectingPages).forEach(addedPage -> addedPage.getEntities().add(entityToBeResized));
|
||||
Sets.difference(currentIntersectingPages, newIntersectingPages)
|
||||
.forEach(removedPage -> removedPage.getEntities().remove(entityToBeResized));
|
||||
Sets.difference(newIntersectingPages, currentIntersectingPages)
|
||||
.forEach(addedPage -> addedPage.getEntities().add(entityToBeResized));
|
||||
|
||||
entityToBeResized.setDeepestFullyContainingNode(closestEntity.getDeepestFullyContainingNode());
|
||||
entityToBeResized.setIntersectingNodes(new ArrayList<>(newIntersectingNodes));
|
||||
@ -135,7 +145,10 @@ public class ManualChangesApplicationService {
|
||||
if (manualResizeRedaction.getPositions().isEmpty() || manualResizeRedaction.getPositions() == null) {
|
||||
return;
|
||||
}
|
||||
var bBox = RectangleTransformations.rectangle2DBBox(manualResizeRedaction.getPositions().stream().map(ManualChangesApplicationService::toRectangle2D).toList());
|
||||
var bBox = RectangleTransformations.rectangle2DBBox(manualResizeRedaction.getPositions()
|
||||
.stream()
|
||||
.map(ManualChangesApplicationService::toRectangle2D)
|
||||
.toList());
|
||||
image.setPosition(bBox);
|
||||
image.getManualOverwrite().addChange(manualResizeRedaction);
|
||||
}
|
||||
|
||||
@ -53,10 +53,13 @@ public class NotFoundImportedEntitiesService {
|
||||
if (!notFoundEntities.isEmpty()) {
|
||||
// imported redactions present, intersections must be added with merged imported redactions
|
||||
Map<Integer, List<PrecursorEntity>> importedRedactionsMap = mapImportedRedactionsOnPage(notFoundEntities);
|
||||
entityLog.getEntityLogEntry().stream().filter(entry -> !entry.getEngines().contains(Engine.IMPORTED)).forEach(redactionLogEntry -> {
|
||||
redactionLogEntry.setImportedRedactionIntersections(new HashSet<>());
|
||||
addIntersections(redactionLogEntry, importedRedactionsMap, analysisNumber);
|
||||
});
|
||||
entityLog.getEntityLogEntry()
|
||||
.stream()
|
||||
.filter(entry -> !entry.getEngines().contains(Engine.IMPORTED))
|
||||
.forEach(redactionLogEntry -> {
|
||||
redactionLogEntry.setImportedRedactionIntersections(new HashSet<>());
|
||||
addIntersections(redactionLogEntry, importedRedactionsMap, analysisNumber);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@ -70,7 +73,10 @@ public class NotFoundImportedEntitiesService {
|
||||
.map(RectangleWithPage::pageNumber)
|
||||
.collect(Collectors.toSet());
|
||||
pageNumbers.forEach(pageNumber -> importedRedactionsMap.put(pageNumber,
|
||||
importedEntities.stream().filter(i -> pageNumber == i.getEntityPosition().get(0).pageNumber()).collect(Collectors.toList())));
|
||||
importedEntities.stream()
|
||||
.filter(i -> pageNumber == i.getEntityPosition()
|
||||
.get(0).pageNumber())
|
||||
.collect(Collectors.toList())));
|
||||
return importedRedactionsMap;
|
||||
}
|
||||
|
||||
|
||||
@ -15,8 +15,12 @@ public class ComponentComparator implements Comparator<Component> {
|
||||
@Override
|
||||
public int compare(Component component1, Component component2) {
|
||||
|
||||
var firstEntity1 = component1.getReferences().stream().min(EntityComparators.first());
|
||||
var firstEntity2 = component2.getReferences().stream().min(EntityComparators.first());
|
||||
var firstEntity1 = component1.getReferences()
|
||||
.stream()
|
||||
.min(EntityComparators.first());
|
||||
var firstEntity2 = component2.getReferences()
|
||||
.stream()
|
||||
.min(EntityComparators.first());
|
||||
if (firstEntity1.isEmpty() && firstEntity2.isEmpty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -40,7 +40,8 @@ public class ComponentCreationService {
|
||||
|
||||
private static List<Entity> findEntitiesFromLongestSection(Collection<Entity> entities) {
|
||||
|
||||
var entitiesBySection = entities.stream().collect(Collectors.groupingBy(entity -> entity.getContainingNode().getHighestParent()));
|
||||
var entitiesBySection = entities.stream()
|
||||
.collect(Collectors.groupingBy(entity -> entity.getContainingNode().getHighestParent()));
|
||||
Optional<SemanticNode> longestSection = entitiesBySection.entrySet()
|
||||
.stream()
|
||||
.sorted(Comparator.comparingInt(ComponentCreationService::getTotalLengthOfEntities).reversed())
|
||||
@ -79,14 +80,20 @@ public class ComponentCreationService {
|
||||
public void firstOrElse(String ruleIdentifier, String name, Collection<Entity> entities, String fallback) {
|
||||
|
||||
String valueDescription = String.format("First found value of type %s or else '%s'", joinTypes(entities), fallback);
|
||||
String value = entities.stream().min(EntityComparators.first()).map(Entity::getValue).orElse(fallback);
|
||||
String value = entities.stream()
|
||||
.min(EntityComparators.first())
|
||||
.map(Entity::getValue)
|
||||
.orElse(fallback);
|
||||
create(ruleIdentifier, name, value, valueDescription, entities);
|
||||
}
|
||||
|
||||
|
||||
private static String joinTypes(Collection<Entity> entities) {
|
||||
|
||||
return entities.stream().map(Entity::getType).distinct().collect(Collectors.joining(", "));
|
||||
return entities.stream()
|
||||
.map(Entity::getType)
|
||||
.distinct()
|
||||
.collect(Collectors.joining(", "));
|
||||
}
|
||||
|
||||
|
||||
@ -104,12 +111,12 @@ public class ComponentCreationService {
|
||||
referencedEntities.addAll(references);
|
||||
|
||||
kieSession.insert(Component.builder()
|
||||
.matchedRule(RuleIdentifier.fromString(ruleIdentifier))
|
||||
.name(name)
|
||||
.value(value)
|
||||
.valueDescription(valueDescription)
|
||||
.references(new LinkedList<>(references))
|
||||
.build());
|
||||
.matchedRule(RuleIdentifier.fromString(ruleIdentifier))
|
||||
.name(name)
|
||||
.value(value)
|
||||
.valueDescription(valueDescription)
|
||||
.references(new LinkedList<>(references))
|
||||
.build());
|
||||
}
|
||||
|
||||
|
||||
@ -142,8 +149,11 @@ public class ComponentCreationService {
|
||||
|
||||
private static List<Entity> findEntitiesFromFirstSection(Collection<Entity> entities) {
|
||||
|
||||
var entitiesBySection = entities.stream().collect(Collectors.groupingBy(entity -> entity.getContainingNode().getHighestParent()));
|
||||
Optional<SemanticNode> firstSection = entitiesBySection.keySet().stream().min(SemanticNodeComparators.first());
|
||||
var entitiesBySection = entities.stream()
|
||||
.collect(Collectors.groupingBy(entity -> entity.getContainingNode().getHighestParent()));
|
||||
Optional<SemanticNode> firstSection = entitiesBySection.keySet()
|
||||
.stream()
|
||||
.min(SemanticNodeComparators.first());
|
||||
if (firstSection.isEmpty()) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
@ -188,7 +198,10 @@ public class ComponentCreationService {
|
||||
public void joining(String ruleIdentifier, String name, Collection<Entity> entities, String delimiter) {
|
||||
|
||||
String valueDescription = String.format("Joining all values of type %s with '%s'", joinTypes(entities), delimiter);
|
||||
String value = entities.stream().sorted(EntityComparators.first()).map(Entity::getValue).collect(Collectors.joining(delimiter));
|
||||
String value = entities.stream()
|
||||
.sorted(EntityComparators.first())
|
||||
.map(Entity::getValue)
|
||||
.collect(Collectors.joining(delimiter));
|
||||
create(ruleIdentifier, name, value, valueDescription, entities);
|
||||
}
|
||||
|
||||
@ -231,14 +244,20 @@ public class ComponentCreationService {
|
||||
public void joiningUnique(String ruleIdentifier, String name, Collection<Entity> entities, String delimiter) {
|
||||
|
||||
String valueDescription = String.format("Joining all unique values of type %s with '%s'", joinTypes(entities), delimiter);
|
||||
String value = entities.stream().sorted(EntityComparators.first()).map(Entity::getValue).distinct().collect(Collectors.joining(delimiter));
|
||||
String value = entities.stream()
|
||||
.sorted(EntityComparators.first())
|
||||
.map(Entity::getValue)
|
||||
.distinct()
|
||||
.collect(Collectors.joining(delimiter));
|
||||
create(ruleIdentifier, name, value, valueDescription, entities);
|
||||
}
|
||||
|
||||
|
||||
private static int getTotalLengthOfEntities(Map.Entry<SemanticNode, List<Entity>> entry) {
|
||||
|
||||
return entry.getValue().stream().mapToInt(Entity::getLength).sum();
|
||||
return entry.getValue()
|
||||
.stream()
|
||||
.mapToInt(Entity::getLength).sum();
|
||||
}
|
||||
|
||||
|
||||
@ -293,7 +312,10 @@ public class ComponentCreationService {
|
||||
*/
|
||||
public void uniqueValueCount(String ruleIdentifier, String name, Collection<Entity> entities) {
|
||||
|
||||
long count = entities.stream().map(Entity::getValue).distinct().count();
|
||||
long count = entities.stream()
|
||||
.map(Entity::getValue)
|
||||
.distinct()
|
||||
.count();
|
||||
create(ruleIdentifier, name, String.valueOf(count), "Number of unique values in the entity references", entities);
|
||||
}
|
||||
|
||||
@ -307,18 +329,20 @@ public class ComponentCreationService {
|
||||
*/
|
||||
public void rowValueCount(String ruleIdentifier, String name, Collection<Entity> entities) {
|
||||
|
||||
entities.stream().collect(Collectors.groupingBy(this::getFirstTable)).forEach((optionalTable, groupedEntities) -> {
|
||||
entities.stream()
|
||||
.collect(Collectors.groupingBy(this::getFirstTable))
|
||||
.forEach((optionalTable, groupedEntities) -> {
|
||||
|
||||
if (optionalTable.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
if (optionalTable.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
long count = groupedEntities.stream()
|
||||
.collect(Collectors.groupingBy(entity -> getFirstTableCell(entity).map(TableCell::getRow).orElse(-1)))
|
||||
.size();
|
||||
long count = groupedEntities.stream()
|
||||
.collect(Collectors.groupingBy(entity -> getFirstTableCell(entity).map(TableCell::getRow)
|
||||
.orElse(-1))).size();
|
||||
|
||||
create(ruleIdentifier, name, String.valueOf(count), "Count rows with values in the entity references in same table", entities);
|
||||
});
|
||||
create(ruleIdentifier, name, String.valueOf(count), "Count rows with values in the entity references in same table", entities);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@ -334,18 +358,20 @@ public class ComponentCreationService {
|
||||
if (entities.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
entities.stream().sorted(EntityComparators.first()).forEach(entity -> {
|
||||
BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.ENGLISH);
|
||||
iterator.setText(entity.getValue());
|
||||
int start = iterator.first();
|
||||
for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) {
|
||||
create(ruleIdentifier,
|
||||
name,
|
||||
entity.getValue().substring(start, end).replaceAll("\\n", "").trim(),
|
||||
String.format("Values of type '%s' as sentences", entity.getType()),
|
||||
entity);
|
||||
}
|
||||
});
|
||||
entities.stream()
|
||||
.sorted(EntityComparators.first())
|
||||
.forEach(entity -> {
|
||||
BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.ENGLISH);
|
||||
iterator.setText(entity.getValue());
|
||||
int start = iterator.first();
|
||||
for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) {
|
||||
create(ruleIdentifier,
|
||||
name,
|
||||
entity.getValue().substring(start, end).replaceAll("\\n", "").trim(),
|
||||
String.format("Values of type '%s' as sentences", entity.getType()),
|
||||
entity);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@ -366,12 +392,12 @@ public class ComponentCreationService {
|
||||
List<Entity> referenceList = new LinkedList<>();
|
||||
referenceList.add(reference);
|
||||
kieSession.insert(Component.builder()
|
||||
.matchedRule(RuleIdentifier.fromString(ruleIdentifier))
|
||||
.name(name)
|
||||
.value(value)
|
||||
.valueDescription(valueDescription)
|
||||
.references(referenceList)
|
||||
.build());
|
||||
.matchedRule(RuleIdentifier.fromString(ruleIdentifier))
|
||||
.name(name)
|
||||
.value(value)
|
||||
.valueDescription(valueDescription)
|
||||
.references(referenceList)
|
||||
.build());
|
||||
}
|
||||
|
||||
|
||||
@ -428,8 +454,10 @@ public class ComponentCreationService {
|
||||
}
|
||||
|
||||
String formattedDateStrings = Stream.concat(//
|
||||
dates.stream().sorted().map(date -> DateConverter.convertDate(date, resultFormat)), //
|
||||
unparsedDates.stream())//
|
||||
dates.stream()
|
||||
.sorted()
|
||||
.map(date -> DateConverter.convertDate(date, resultFormat)), //
|
||||
unparsedDates.stream())//
|
||||
.collect(Collectors.joining(", "));
|
||||
|
||||
create(ruleIdentifier, name, formattedDateStrings, valueDescription, entities);
|
||||
@ -445,26 +473,34 @@ public class ComponentCreationService {
|
||||
*/
|
||||
public void joiningFromSameTableRow(String ruleIdentifier, String name, Collection<Entity> entities) {
|
||||
|
||||
String types = entities.stream().map(Entity::getType).sorted(Comparator.reverseOrder()).distinct().collect(Collectors.joining(", "));
|
||||
String types = entities.stream()
|
||||
.map(Entity::getType)
|
||||
.sorted(Comparator.reverseOrder())
|
||||
.distinct()
|
||||
.collect(Collectors.joining(", "));
|
||||
String valueDescription = String.format("Combine values of %s that are in same table row", types);
|
||||
entities.stream().collect(Collectors.groupingBy(this::getFirstTable)).forEach((optionalTable, groupedEntities) -> {
|
||||
if (optionalTable.isEmpty()) {
|
||||
groupedEntities.forEach(entity -> create(ruleIdentifier, name, entity.getValue(), valueDescription, entity));
|
||||
}
|
||||
entities.stream()
|
||||
.collect(Collectors.groupingBy(this::getFirstTable))
|
||||
.forEach((optionalTable, groupedEntities) -> {
|
||||
if (optionalTable.isEmpty()) {
|
||||
groupedEntities.forEach(entity -> create(ruleIdentifier, name, entity.getValue(), valueDescription, entity));
|
||||
}
|
||||
|
||||
groupedEntities.stream()
|
||||
.filter(entity -> entity.getContainingNode() instanceof TableCell)
|
||||
.collect(Collectors.groupingBy(entity -> ((TableCell) entity.getContainingNode()).getRow()))
|
||||
.entrySet()
|
||||
.stream()
|
||||
.sorted(Comparator.comparingInt(Map.Entry::getKey))
|
||||
.map(Map.Entry::getValue)
|
||||
.forEach(entitiesInSameRow -> create(ruleIdentifier,
|
||||
name,
|
||||
entitiesInSameRow.stream().sorted(EntityComparators.first()).map(Entity::getValue).collect(Collectors.joining(", ")),
|
||||
valueDescription,
|
||||
entitiesInSameRow));
|
||||
});
|
||||
groupedEntities.stream()
|
||||
.filter(entity -> entity.getContainingNode() instanceof TableCell)
|
||||
.collect(Collectors.groupingBy(entity -> ((TableCell) entity.getContainingNode()).getRow())).entrySet()
|
||||
.stream()
|
||||
.sorted(Comparator.comparingInt(Map.Entry::getKey))
|
||||
.map(Map.Entry::getValue)
|
||||
.forEach(entitiesInSameRow -> create(ruleIdentifier,
|
||||
name,
|
||||
entitiesInSameRow.stream()
|
||||
.sorted(EntityComparators.first())
|
||||
.map(Entity::getValue)
|
||||
.collect(Collectors.joining(", ")),
|
||||
valueDescription,
|
||||
entitiesInSameRow));
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@ -521,12 +557,12 @@ public class ComponentCreationService {
|
||||
public void create(String ruleIdentifier, String name, String value) {
|
||||
|
||||
kieSession.insert(Component.builder()
|
||||
.matchedRule(RuleIdentifier.fromString(ruleIdentifier))
|
||||
.name(name)
|
||||
.value(value)
|
||||
.valueDescription("")
|
||||
.references(Collections.emptyList())
|
||||
.build());
|
||||
.matchedRule(RuleIdentifier.fromString(ruleIdentifier))
|
||||
.name(name)
|
||||
.value(value)
|
||||
.valueDescription("")
|
||||
.references(Collections.emptyList())
|
||||
.build());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -40,7 +40,9 @@ public class DocumentGraphMapper {
|
||||
DocumentTree documentTree = new DocumentTree(document);
|
||||
Context context = new Context(documentData, documentTree);
|
||||
|
||||
context.pageData.addAll(Arrays.stream(documentData.getDocumentPages()).map(DocumentGraphMapper::buildPage).toList());
|
||||
context.pageData.addAll(Arrays.stream(documentData.getDocumentPages())
|
||||
.map(DocumentGraphMapper::buildPage)
|
||||
.toList());
|
||||
|
||||
context.documentTree.getRoot().getChildren().addAll(buildEntries(documentData.getDocumentStructure().getRoot().getChildren(), context));
|
||||
|
||||
@ -58,7 +60,9 @@ public class DocumentGraphMapper {
|
||||
List<DocumentTree.Entry> newEntries = new LinkedList<>();
|
||||
for (DocumentStructure.EntryData entryData : entries) {
|
||||
|
||||
List<Page> pages = Arrays.stream(entryData.getPageNumbers()).map(pageNumber -> getPage(pageNumber, context)).toList();
|
||||
List<Page> pages = Arrays.stream(entryData.getPageNumbers())
|
||||
.map(pageNumber -> getPage(pageNumber, context))
|
||||
.toList();
|
||||
|
||||
SemanticNode node = switch (entryData.getType()) {
|
||||
case SECTION -> buildSection(context);
|
||||
@ -76,8 +80,10 @@ public class DocumentGraphMapper {
|
||||
TextBlock textBlock = toTextBlock(entryData.getAtomicBlockIds(), context, node);
|
||||
node.setLeafTextBlock(textBlock);
|
||||
}
|
||||
List<Integer> treeId = Arrays.stream(entryData.getTreeId()).boxed().toList();
|
||||
entryData.getEngines().forEach(engine -> node.addEngine(engine));
|
||||
List<Integer> treeId = Arrays.stream(entryData.getTreeId()).boxed()
|
||||
.toList();
|
||||
entryData.getEngines()
|
||||
.forEach(engine -> node.addEngine(engine));
|
||||
node.setTreeId(treeId);
|
||||
|
||||
switch (entryData.getType()) {
|
||||
@ -150,16 +156,18 @@ public class DocumentGraphMapper {
|
||||
|
||||
private TextBlock toTextBlock(Long[] atomicTextBlockIds, Context context, SemanticNode parent) {
|
||||
|
||||
return Arrays.stream(atomicTextBlockIds).map(atomicTextBlockId -> getAtomicTextBlock(context, parent, atomicTextBlockId)).collect(new TextBlockCollector());
|
||||
return Arrays.stream(atomicTextBlockIds)
|
||||
.map(atomicTextBlockId -> getAtomicTextBlock(context, parent, atomicTextBlockId))
|
||||
.collect(new TextBlockCollector());
|
||||
}
|
||||
|
||||
|
||||
private AtomicTextBlock getAtomicTextBlock(Context context, SemanticNode parent, Long atomicTextBlockId) {
|
||||
|
||||
return AtomicTextBlock.fromAtomicTextBlockData(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)),
|
||||
context.documentPositionData.get(Math.toIntExact(atomicTextBlockId)),
|
||||
parent,
|
||||
getPage(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)).getPage(), context));
|
||||
context.documentPositionData.get(Math.toIntExact(atomicTextBlockId)),
|
||||
parent,
|
||||
getPage(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)).getPage(), context));
|
||||
}
|
||||
|
||||
|
||||
@ -190,8 +198,10 @@ public class DocumentGraphMapper {
|
||||
|
||||
this.documentTree = documentTree;
|
||||
this.pageData = new LinkedList<>();
|
||||
this.documentTextData = Arrays.stream(documentData.getDocumentTextData()).toList();
|
||||
this.documentPositionData = Arrays.stream(documentData.getDocumentPositionData()).toList();
|
||||
this.documentTextData = Arrays.stream(documentData.getDocumentTextData())
|
||||
.toList();
|
||||
this.documentPositionData = Arrays.stream(documentData.getDocumentPositionData())
|
||||
.toList();
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -11,6 +11,7 @@ public abstract class EntityComparators implements Comparator<Entity> {
|
||||
return new FirstEntity();
|
||||
}
|
||||
|
||||
|
||||
public static class LongestEntity implements Comparator<Entity> {
|
||||
|
||||
@Override
|
||||
@ -27,6 +28,7 @@ public abstract class EntityComparators implements Comparator<Entity> {
|
||||
return new LongestEntity();
|
||||
}
|
||||
|
||||
|
||||
public static class FirstEntity implements Comparator<Entity> {
|
||||
|
||||
@Override
|
||||
|
||||
@ -276,7 +276,8 @@ public class EntityCreationService {
|
||||
"this is some text. a here is more text" and "here is more text". We only want to keep the latter.
|
||||
*/
|
||||
return entityTextRanges.stream()
|
||||
.filter(boundary -> entityTextRanges.stream().noneMatch(innerBoundary -> !innerBoundary.equals(boundary) && innerBoundary.containedBy(boundary)))
|
||||
.filter(boundary -> entityTextRanges.stream()
|
||||
.noneMatch(innerBoundary -> !innerBoundary.equals(boundary) && innerBoundary.containedBy(boundary)))
|
||||
.toList();
|
||||
|
||||
}
|
||||
@ -351,10 +352,10 @@ public class EntityCreationService {
|
||||
|
||||
return tableNode.streamTableCells()
|
||||
.flatMap(tableCell -> lineAfterBoundariesAcrossColumns(RedactionSearchUtility.findTextRangesByString(string, tableCell.getTextBlock()),
|
||||
tableCell,
|
||||
type,
|
||||
entityType,
|
||||
tableNode));
|
||||
tableCell,
|
||||
type,
|
||||
entityType,
|
||||
tableNode));
|
||||
}
|
||||
|
||||
|
||||
@ -362,10 +363,10 @@ public class EntityCreationService {
|
||||
|
||||
return tableNode.streamTableCells()
|
||||
.flatMap(tableCell -> lineAfterBoundariesAcrossColumns(RedactionSearchUtility.findTextRangesByStringIgnoreCase(string, tableCell.getTextBlock()),
|
||||
tableCell,
|
||||
type,
|
||||
entityType,
|
||||
tableNode));
|
||||
tableCell,
|
||||
type,
|
||||
entityType,
|
||||
tableNode));
|
||||
}
|
||||
|
||||
|
||||
@ -500,7 +501,10 @@ public class EntityCreationService {
|
||||
|
||||
public Stream<TextEntity> bySemanticNodeParagraphsOnly(SemanticNode node, String type, EntityType entityType) {
|
||||
|
||||
return node.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(semanticNode -> bySemanticNode(semanticNode, type, entityType)).filter(Optional::isPresent).map(Optional::get);
|
||||
return node.streamAllSubNodesOfType(NodeType.PARAGRAPH)
|
||||
.map(semanticNode -> bySemanticNode(semanticNode, type, entityType))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
|
||||
@ -592,7 +596,11 @@ public class EntityCreationService {
|
||||
TextRange trimmedTextRange = textRange.trim(node.getTextBlock());
|
||||
TextEntity entity = TextEntity.initialEntityNode(trimmedTextRange, type, entityType, node);
|
||||
if (node.getEntities().contains(entity)) {
|
||||
Optional<TextEntity> optionalTextEntity = node.getEntities().stream().filter(e -> e.equals(entity) && e.type().equals(type)).peek(e -> e.addEngines(engines)).findAny();
|
||||
Optional<TextEntity> optionalTextEntity = node.getEntities()
|
||||
.stream()
|
||||
.filter(e -> e.equals(entity) && e.type().equals(type))
|
||||
.peek(e -> e.addEngines(engines))
|
||||
.findAny();
|
||||
if (optionalTextEntity.isEmpty()) {
|
||||
return optionalTextEntity; // Entity has been recategorized and should not be created at all.
|
||||
}
|
||||
@ -647,17 +655,27 @@ public class EntityCreationService {
|
||||
return entitiesToMerge.get(0);
|
||||
}
|
||||
|
||||
TextEntity mergedEntity = TextEntity.initialEntityNode(TextRange.merge(entitiesToMerge.stream().map(TextEntity::getTextRange).toList()), type, entityType, node);
|
||||
mergedEntity.addEngines(entitiesToMerge.stream().flatMap(entityNode -> entityNode.getEngines().stream()).collect(Collectors.toSet()));
|
||||
entitiesToMerge.stream().map(TextEntity::getMatchedRuleList).flatMap(Collection::stream).forEach(matchedRule -> mergedEntity.getMatchedRuleList().add(matchedRule));
|
||||
TextEntity mergedEntity = TextEntity.initialEntityNode(TextRange.merge(entitiesToMerge.stream()
|
||||
.map(TextEntity::getTextRange)
|
||||
.toList()), type, entityType, node);
|
||||
mergedEntity.addEngines(entitiesToMerge.stream()
|
||||
.flatMap(entityNode -> entityNode.getEngines()
|
||||
.stream())
|
||||
.collect(Collectors.toSet()));
|
||||
entitiesToMerge.stream()
|
||||
.map(TextEntity::getMatchedRuleList)
|
||||
.flatMap(Collection::stream)
|
||||
.forEach(matchedRule -> mergedEntity.getMatchedRuleList().add(matchedRule));
|
||||
entitiesToMerge.stream()
|
||||
.map(TextEntity::getManualOverwrite)
|
||||
.map(ManualChangeOverwrite::getManualChangeLog)
|
||||
.flatMap(Collection::stream)
|
||||
.forEach(manualChange -> mergedEntity.getManualOverwrite().addChange(manualChange));
|
||||
|
||||
mergedEntity.setDictionaryEntry(entitiesToMerge.stream().anyMatch(TextEntity::isDictionaryEntry));
|
||||
mergedEntity.setDossierDictionaryEntry(entitiesToMerge.stream().anyMatch(TextEntity::isDossierDictionaryEntry));
|
||||
mergedEntity.setDictionaryEntry(entitiesToMerge.stream()
|
||||
.anyMatch(TextEntity::isDictionaryEntry));
|
||||
mergedEntity.setDossierDictionaryEntry(entitiesToMerge.stream()
|
||||
.anyMatch(TextEntity::isDossierDictionaryEntry));
|
||||
|
||||
addEntityToGraph(mergedEntity, node);
|
||||
insertToKieSession(mergedEntity);
|
||||
@ -667,7 +685,8 @@ public class EntityCreationService {
|
||||
|
||||
public Stream<TextEntity> copyEntities(List<TextEntity> entities, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return entities.stream().map(entity -> copyEntity(entity, type, entityType, node));
|
||||
return entities.stream()
|
||||
.map(entity -> copyEntity(entity, type, entityType, node));
|
||||
}
|
||||
|
||||
|
||||
@ -744,7 +763,8 @@ public class EntityCreationService {
|
||||
try {
|
||||
if (node.getEntities().contains(entity)) {
|
||||
// If entity already exists and it has a different text range, we add the text range to the list of duplicated text ranges
|
||||
node.getEntities().stream()//
|
||||
node.getEntities()
|
||||
.stream()//
|
||||
.filter(e -> e.equals(entity))//
|
||||
.filter(e -> !e.getTextRange().equals(entity.getTextRange()))//
|
||||
.findAny()//
|
||||
@ -770,8 +790,10 @@ public class EntityCreationService {
|
||||
SemanticNode deepestSharedNode = entityToDuplicate.getIntersectingNodes()
|
||||
.stream()
|
||||
.sorted(Comparator.comparingInt(n -> -n.getTreeId().size()))
|
||||
.filter(intersectingNode -> entityToDuplicate.getDuplicateTextRanges().stream().allMatch(tr -> intersectingNode.getTextRange().contains(tr)) && //
|
||||
intersectingNode.getTextRange().contains(entityToDuplicate.getTextRange()))
|
||||
.filter(intersectingNode -> entityToDuplicate.getDuplicateTextRanges()
|
||||
.stream()
|
||||
.allMatch(tr -> intersectingNode.getTextRange().contains(tr)) && //
|
||||
intersectingNode.getTextRange().contains(entityToDuplicate.getTextRange()))
|
||||
.findFirst()
|
||||
.orElse(node.getDocumentTree().getRoot().getNode());
|
||||
|
||||
@ -784,7 +806,8 @@ public class EntityCreationService {
|
||||
return;
|
||||
}
|
||||
additionalIntersectingNode.getEntities().add(entityToDuplicate);
|
||||
additionalIntersectingNode.getPages(newTextRange).forEach(page -> page.getEntities().add(entityToDuplicate));
|
||||
additionalIntersectingNode.getPages(newTextRange)
|
||||
.forEach(page -> page.getEntities().add(entityToDuplicate));
|
||||
entityToDuplicate.addIntersectingNode(additionalIntersectingNode);
|
||||
});
|
||||
}
|
||||
@ -806,5 +829,4 @@ public class EntityCreationService {
|
||||
addEntityToNodeEntitySets(entity);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -11,7 +11,6 @@ import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBl
|
||||
|
||||
public class EntityCreationUtility {
|
||||
|
||||
|
||||
public static void checkIfBothStartAndEndAreEmpty(String start, String end) {
|
||||
|
||||
checkIfBothStartAndEndAreEmpty(List.of(start), List.of(end));
|
||||
@ -57,7 +56,8 @@ public class EntityCreationUtility {
|
||||
|
||||
public static void addEntityToNodeEntitySets(TextEntity entity) {
|
||||
|
||||
entity.getIntersectingNodes().forEach(node -> node.getEntities().add(entity));
|
||||
entity.getIntersectingNodes()
|
||||
.forEach(node -> node.getEntities().add(entity));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -59,7 +59,9 @@ public class EntityEnrichmentService {
|
||||
|
||||
private static List<String> splitToWordsAndRemoveEmptyWords(String textAfter) {
|
||||
|
||||
return Arrays.stream(textAfter.split(" ")).filter(word -> !Objects.equals("", word)).toList();
|
||||
return Arrays.stream(textAfter.split(" "))
|
||||
.filter(word -> !Objects.equals("", word))
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -47,7 +47,9 @@ public class EntityFindingUtility {
|
||||
}
|
||||
|
||||
|
||||
public Optional<TextEntity> findClosestEntityAndReturnEmptyIfNotFound(PrecursorEntity precursorEntity, Map<String, List<TextEntity>> entitiesWithSameValue, double matchThreshold) {
|
||||
public Optional<TextEntity> findClosestEntityAndReturnEmptyIfNotFound(PrecursorEntity precursorEntity,
|
||||
Map<String, List<TextEntity>> entitiesWithSameValue,
|
||||
double matchThreshold) {
|
||||
|
||||
if (precursorEntity.getValue() == null) {
|
||||
return Optional.empty();
|
||||
@ -73,11 +75,15 @@ public class EntityFindingUtility {
|
||||
ClosestEntity closestEntity = optionalClosestEntity.get();
|
||||
if (closestEntity.getDistance() > matchThreshold) {
|
||||
log.warn("For entity {} on page {} with positions {} distance to closest found entity is {} and therefore higher than the threshold of {}",
|
||||
precursorEntity.getValue(),
|
||||
precursorEntity.getEntityPosition().get(0).pageNumber(),
|
||||
precursorEntity.getEntityPosition().stream().map(RectangleWithPage::rectangle2D).toList(),
|
||||
closestEntity.getDistance(),
|
||||
matchThreshold);
|
||||
precursorEntity.getValue(),
|
||||
precursorEntity.getEntityPosition()
|
||||
.get(0).pageNumber(),
|
||||
precursorEntity.getEntityPosition()
|
||||
.stream()
|
||||
.map(RectangleWithPage::rectangle2D)
|
||||
.toList(),
|
||||
closestEntity.getDistance(),
|
||||
matchThreshold);
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
@ -93,8 +99,14 @@ public class EntityFindingUtility {
|
||||
|
||||
private static boolean pagesMatch(TextEntity entity, List<RectangleWithPage> originalPositions) {
|
||||
|
||||
Set<Integer> entityPageNumbers = entity.getPositionsOnPagePerPage().stream().map(PositionOnPage::getPage).map(Page::getNumber).collect(Collectors.toSet());
|
||||
Set<Integer> originalPageNumbers = originalPositions.stream().map(RectangleWithPage::pageNumber).collect(Collectors.toSet());
|
||||
Set<Integer> entityPageNumbers = entity.getPositionsOnPagePerPage()
|
||||
.stream()
|
||||
.map(PositionOnPage::getPage)
|
||||
.map(Page::getNumber)
|
||||
.collect(Collectors.toSet());
|
||||
Set<Integer> originalPageNumbers = originalPositions.stream()
|
||||
.map(RectangleWithPage::pageNumber)
|
||||
.collect(Collectors.toSet());
|
||||
return entityPageNumbers.containsAll(originalPageNumbers);
|
||||
}
|
||||
|
||||
@ -105,15 +117,16 @@ public class EntityFindingUtility {
|
||||
return Double.MAX_VALUE;
|
||||
}
|
||||
return originalPositions.stream()
|
||||
.mapToDouble(rectangleWithPage -> calculateMinDistancePerRectangle(entity, rectangleWithPage.pageNumber(), rectangleWithPage.rectangle2D()))
|
||||
.average()
|
||||
.mapToDouble(rectangleWithPage -> calculateMinDistancePerRectangle(entity, rectangleWithPage.pageNumber(), rectangleWithPage.rectangle2D())).average()
|
||||
.orElse(Double.MAX_VALUE);
|
||||
}
|
||||
|
||||
|
||||
private static long countRectangles(TextEntity entity) {
|
||||
|
||||
return entity.getPositionsOnPagePerPage().stream().mapToLong(redactionPosition -> redactionPosition.getRectanglePerLine().size()).sum();
|
||||
return entity.getPositionsOnPagePerPage()
|
||||
.stream()
|
||||
.mapToLong(redactionPosition -> redactionPosition.getRectanglePerLine().size()).sum();
|
||||
}
|
||||
|
||||
|
||||
@ -144,22 +157,33 @@ public class EntityFindingUtility {
|
||||
double maxY2 = Math.max(rectangle2.getMinY(), rectangle2.getMaxY());
|
||||
|
||||
return Math.abs(minX1 - minX2) //
|
||||
+ Math.abs(minY1 - minY2) //
|
||||
+ Math.abs(maxX1 - maxX2) //
|
||||
+ Math.abs(maxY1 - maxY2);
|
||||
+ Math.abs(minY1 - minY2) //
|
||||
+ Math.abs(maxX1 - maxX2) //
|
||||
+ Math.abs(maxY1 - maxY2);
|
||||
}
|
||||
|
||||
|
||||
public Map<String, List<TextEntity>> findAllPossibleEntitiesAndGroupByValue(SemanticNode node, List<PrecursorEntity> manualEntities) {
|
||||
|
||||
Set<Integer> pageNumbers = manualEntities.stream().flatMap(entry -> entry.getEntityPosition().stream().map(RectangleWithPage::pageNumber)).collect(Collectors.toSet());
|
||||
Set<String> entryValues = manualEntities.stream().map(PrecursorEntity::getValue).filter(Objects::nonNull).map(String::toLowerCase).collect(Collectors.toSet());
|
||||
Set<Integer> pageNumbers = manualEntities.stream()
|
||||
.flatMap(entry -> entry.getEntityPosition()
|
||||
.stream()
|
||||
.map(RectangleWithPage::pageNumber))
|
||||
.collect(Collectors.toSet());
|
||||
Set<String> entryValues = manualEntities.stream()
|
||||
.map(PrecursorEntity::getValue)
|
||||
.filter(Objects::nonNull)
|
||||
.map(String::toLowerCase)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
if (!pageNumbers.stream().allMatch(node::onPage)) {
|
||||
if (!pageNumbers.stream()
|
||||
.allMatch(node::onPage)) {
|
||||
throw new IllegalArgumentException(format("SemanticNode \"%s\" does not contain these pages %s, it has pages: %s",
|
||||
node,
|
||||
pageNumbers.stream().filter(pageNumber -> !node.onPage(pageNumber)).toList(),
|
||||
node.getPages()));
|
||||
node,
|
||||
pageNumbers.stream()
|
||||
.filter(pageNumber -> !node.onPage(pageNumber))
|
||||
.toList(),
|
||||
node.getPages()));
|
||||
}
|
||||
SearchImplementation searchImplementation = new SearchImplementation(entryValues, true);
|
||||
|
||||
|
||||
@ -52,9 +52,13 @@ public class ImportedRedactionEntryService {
|
||||
private List<BaseAnnotation> allManualChangesExceptAdd(ManualRedactions manualRedactions) {
|
||||
|
||||
return Stream.of(manualRedactions.getForceRedactions(),
|
||||
manualRedactions.getResizeRedactions(),
|
||||
manualRedactions.getRecategorizations(),
|
||||
manualRedactions.getIdsToRemove(),
|
||||
manualRedactions.getLegalBasisChanges()).flatMap(Collection::stream).map(baseAnnotation -> (BaseAnnotation) baseAnnotation).toList();
|
||||
manualRedactions.getResizeRedactions(),
|
||||
manualRedactions.getRecategorizations(),
|
||||
manualRedactions.getIdsToRemove(),
|
||||
manualRedactions.getLegalBasisChanges())
|
||||
.flatMap(Collection::stream)
|
||||
.map(baseAnnotation -> (BaseAnnotation) baseAnnotation)
|
||||
.toList();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -14,12 +14,14 @@ public class IntersectingNodeVisitor implements NodeVisitor {
|
||||
private Set<SemanticNode> intersectingNodes;
|
||||
private final TextRange textRange;
|
||||
|
||||
|
||||
public IntersectingNodeVisitor(TextRange textRange) {
|
||||
|
||||
this.textRange = textRange;
|
||||
this.intersectingNodes = new HashSet<>();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void visit(SemanticNode node) {
|
||||
|
||||
|
||||
@ -31,7 +31,8 @@ public class ManualRedactionEntryService {
|
||||
List<PrecursorEntity> notFoundManualRedactionEntries = Collections.emptyList();
|
||||
if (analyzeRequest.getManualRedactions() != null) {
|
||||
notFoundManualRedactionEntries = entityFromPrecursorCreationService.createEntitiesIfFoundAndReturnNotFoundEntries(analyzeRequest.getManualRedactions(),
|
||||
document, dossierTemplateId);
|
||||
document,
|
||||
dossierTemplateId);
|
||||
log.info("Added Manual redaction entries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
}
|
||||
|
||||
@ -51,10 +52,13 @@ public class ManualRedactionEntryService {
|
||||
private List<BaseAnnotation> allManualChangesExceptAdd(ManualRedactions manualRedactions) {
|
||||
|
||||
return Stream.of(manualRedactions.getForceRedactions(),
|
||||
manualRedactions.getResizeRedactions(),
|
||||
manualRedactions.getRecategorizations(),
|
||||
manualRedactions.getIdsToRemove(),
|
||||
manualRedactions.getLegalBasisChanges()).flatMap(Collection::stream).map(baseAnnotation -> (BaseAnnotation) baseAnnotation).toList();
|
||||
manualRedactions.getResizeRedactions(),
|
||||
manualRedactions.getRecategorizations(),
|
||||
manualRedactions.getIdsToRemove(),
|
||||
manualRedactions.getLegalBasisChanges())
|
||||
.flatMap(Collection::stream)
|
||||
.map(baseAnnotation -> (BaseAnnotation) baseAnnotation)
|
||||
.toList();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -44,9 +44,11 @@ public class NerEntitiesAdapter {
|
||||
public NerEntities toNerEntities(NerEntitiesModel nerEntitiesModel, Document document) {
|
||||
|
||||
return new NerEntities(addOffsetsAndFlatten(getStringStartOffsetsForMainSections(document),
|
||||
nerEntitiesModel).map(nerEntityModel -> new NerEntities.NerEntity(nerEntityModel.getValue(),
|
||||
new TextRange(nerEntityModel.getStartOffset(), nerEntityModel.getEndOffset()),
|
||||
nerEntityModel.getType())).toList());
|
||||
nerEntitiesModel).map(nerEntityModel -> new NerEntities.NerEntity(nerEntityModel.getValue(),
|
||||
new TextRange(nerEntityModel.getStartOffset(),
|
||||
nerEntityModel.getEndOffset()),
|
||||
nerEntityModel.getType()))
|
||||
.toList());
|
||||
}
|
||||
|
||||
|
||||
@ -83,7 +85,9 @@ public class NerEntitiesAdapter {
|
||||
|
||||
List<List<NerEntities.NerEntity>> entityClusters = new LinkedList<>();
|
||||
|
||||
List<NerEntities.NerEntity> startEntitiesOfEssentialType = sortedEntities.stream().filter(e -> essentialTypes.contains(e.type())).toList();
|
||||
List<NerEntities.NerEntity> startEntitiesOfEssentialType = sortedEntities.stream()
|
||||
.filter(e -> essentialTypes.contains(e.type()))
|
||||
.toList();
|
||||
for (NerEntities.NerEntity startEntity : startEntitiesOfEssentialType) {
|
||||
List<NerEntities.NerEntity> currentCluster = new LinkedList<>();
|
||||
entityClusters.add(currentCluster);
|
||||
@ -105,7 +109,10 @@ public class NerEntitiesAdapter {
|
||||
}
|
||||
}
|
||||
|
||||
return entityClusters.stream().filter(cluster -> cluster.size() >= minPartsToCombine).map(NerEntitiesAdapter::toContainingBoundary).distinct();
|
||||
return entityClusters.stream()
|
||||
.filter(cluster -> cluster.size() >= minPartsToCombine)
|
||||
.map(NerEntitiesAdapter::toContainingBoundary)
|
||||
.distinct();
|
||||
}
|
||||
|
||||
|
||||
@ -124,17 +131,18 @@ public class NerEntitiesAdapter {
|
||||
public Stream<TextRange> combineNerEntitiesToCbiAddressDefaults(NerEntities entityRecognitionEntities) {
|
||||
|
||||
return combineNerEntities(entityRecognitionEntities,
|
||||
CBI_ADDRESS_ESSENTIAL_TYPES,
|
||||
CBI_ADDRESS_TYPES_TO_COMBINE,
|
||||
MAX_DISTANCE_BETWEEN_PARTS,
|
||||
MIN_PARTS_TO_COMBINE,
|
||||
ALLOW_DUPLICATES);
|
||||
CBI_ADDRESS_ESSENTIAL_TYPES,
|
||||
CBI_ADDRESS_TYPES_TO_COMBINE,
|
||||
MAX_DISTANCE_BETWEEN_PARTS,
|
||||
MIN_PARTS_TO_COMBINE,
|
||||
ALLOW_DUPLICATES);
|
||||
}
|
||||
|
||||
|
||||
private static boolean isDuplicate(List<NerEntities.NerEntity> currentCluster, NerEntities.NerEntity entity, boolean allowDuplicates) {
|
||||
|
||||
return allowDuplicates || currentCluster.stream().anyMatch(e -> e.type().equals(entity.type()));
|
||||
return allowDuplicates || currentCluster.stream()
|
||||
.anyMatch(e -> e.type().equals(entity.type()));
|
||||
}
|
||||
|
||||
|
||||
@ -146,24 +154,34 @@ public class NerEntitiesAdapter {
|
||||
|
||||
private static TextRange toContainingBoundary(List<NerEntities.NerEntity> nerEntities) {
|
||||
|
||||
return TextRange.merge(nerEntities.stream().map(NerEntities.NerEntity::textRange).toList());
|
||||
return TextRange.merge(nerEntities.stream()
|
||||
.map(NerEntities.NerEntity::textRange)
|
||||
.toList());
|
||||
}
|
||||
|
||||
|
||||
private static Stream<EntityRecognitionEntity> addOffsetsAndFlatten(List<Integer> stringOffsetsForMainSections, NerEntitiesModel nerEntitiesModel) {
|
||||
|
||||
nerEntitiesModel.getData().forEach((sectionNumber, listOfNerEntities) -> listOfNerEntities.forEach(entityRecognitionEntity -> {
|
||||
int newStartOffset = entityRecognitionEntity.getStartOffset() + stringOffsetsForMainSections.get(sectionNumber);
|
||||
entityRecognitionEntity.setStartOffset(newStartOffset);
|
||||
entityRecognitionEntity.setEndOffset(newStartOffset + entityRecognitionEntity.getValue().length());
|
||||
}));
|
||||
return nerEntitiesModel.getData().values().stream().flatMap(Collection::stream);
|
||||
nerEntitiesModel.getData()
|
||||
.forEach((sectionNumber, listOfNerEntities) -> listOfNerEntities.forEach(entityRecognitionEntity -> {
|
||||
int newStartOffset = entityRecognitionEntity.getStartOffset() + stringOffsetsForMainSections.get(sectionNumber);
|
||||
entityRecognitionEntity.setStartOffset(newStartOffset);
|
||||
entityRecognitionEntity.setEndOffset(newStartOffset + entityRecognitionEntity.getValue().length());
|
||||
}));
|
||||
return nerEntitiesModel.getData().values()
|
||||
.stream()
|
||||
.flatMap(Collection::stream);
|
||||
}
|
||||
|
||||
|
||||
private static List<Integer> getStringStartOffsetsForMainSections(Document document) {
|
||||
|
||||
return document.getMainSections().stream().map(Section::getTextBlock).map(TextBlock::getTextRange).map(TextRange::start).toList();
|
||||
return document.getMainSections()
|
||||
.stream()
|
||||
.map(Section::getTextBlock)
|
||||
.map(TextBlock::getTextRange)
|
||||
.map(TextRange::start)
|
||||
.toList();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -5,4 +5,5 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNo
|
||||
public interface NodeVisitor {
|
||||
|
||||
void visit(SemanticNode node);
|
||||
|
||||
}
|
||||
|
||||
@ -43,7 +43,9 @@ public class PropertiesMapper {
|
||||
|
||||
private Rectangle2D parseRectangle2D(String bBox) {
|
||||
|
||||
List<Float> floats = Arrays.stream(bBox.split(DocumentStructure.RECTANGLE_DELIMITER)).map(Float::parseFloat).toList();
|
||||
List<Float> floats = Arrays.stream(bBox.split(DocumentStructure.RECTANGLE_DELIMITER))
|
||||
.map(Float::parseFloat)
|
||||
.toList();
|
||||
return new Rectangle2D.Float(floats.get(0), floats.get(1), floats.get(2), floats.get(3));
|
||||
}
|
||||
|
||||
|
||||
@ -12,6 +12,7 @@ public abstract class SemanticNodeComparators implements Comparator<SemanticNode
|
||||
return new FirstSemanticNode();
|
||||
}
|
||||
|
||||
|
||||
public static class FirstSemanticNode extends SemanticNodeComparators {
|
||||
|
||||
@Override
|
||||
|
||||
@ -50,7 +50,9 @@ public class ComponentDroolsExecutionService {
|
||||
.filter(entityLogEntry -> entityLogEntry.getState().equals(EntryState.APPLIED))
|
||||
.map(entry -> Entity.fromEntityLogEntry(entry, document))
|
||||
.forEach(kieSession::insert);
|
||||
fileAttributes.stream().filter(f -> f.getValue() != null).forEach(kieSession::insert);
|
||||
fileAttributes.stream()
|
||||
.filter(f -> f.getValue() != null)
|
||||
.forEach(kieSession::insert);
|
||||
|
||||
CompletableFuture<Void> completableFuture = CompletableFuture.supplyAsync(() -> {
|
||||
kieSession.fireAllRules();
|
||||
@ -58,7 +60,8 @@ public class ComponentDroolsExecutionService {
|
||||
});
|
||||
|
||||
try {
|
||||
completableFuture.orTimeout(settings.getDroolsExecutionTimeoutSecs(), TimeUnit.SECONDS).get();
|
||||
completableFuture.orTimeout(settings.getDroolsExecutionTimeoutSecs(), TimeUnit.SECONDS)
|
||||
.get();
|
||||
} catch (ExecutionException e) {
|
||||
kieSession.dispose();
|
||||
if (e.getCause() instanceof TimeoutException) {
|
||||
@ -71,7 +74,9 @@ public class ComponentDroolsExecutionService {
|
||||
}
|
||||
|
||||
List<FileAttribute> resultingFileAttributes = getFileAttributes(kieSession);
|
||||
List<Component> components = getComponents(kieSession).stream().sorted(ComponentComparator.first()).toList();
|
||||
List<Component> components = getComponents(kieSession).stream()
|
||||
.sorted(ComponentComparator.first())
|
||||
.toList();
|
||||
kieSession.dispose();
|
||||
return components;
|
||||
}
|
||||
|
||||
@ -60,6 +60,7 @@ public class DroolsSyntaxValidationService {
|
||||
return droolsCompilerSyntaxValidation;
|
||||
}
|
||||
|
||||
|
||||
private DroolsSyntaxDeprecatedWarnings getWarningsForDeprecatedImports(RuleFileBluePrint ruleFileBluePrint) {
|
||||
|
||||
if (!deprecatedElementsFinder.getDeprecatedClasses().isEmpty()) {
|
||||
@ -70,13 +71,13 @@ public class DroolsSyntaxValidationService {
|
||||
String sb = "Following imports are deprecated: \n" + matches.stream()
|
||||
.map(m -> imports.substring(m.startIndex(), m.endIndex()))
|
||||
.collect(Collectors.joining("\n"));
|
||||
return DroolsSyntaxDeprecatedWarnings.builder().line(ruleFileBluePrint.getImportLine()).column(0).message(sb)
|
||||
.build();
|
||||
return DroolsSyntaxDeprecatedWarnings.builder().line(ruleFileBluePrint.getImportLine()).column(0).message(sb).build();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
private List<DroolsSyntaxDeprecatedWarnings> getWarningsForDeprecatedRules(RuleFileBluePrint ruleFileBluePrint) {
|
||||
|
||||
List<DroolsSyntaxDeprecatedWarnings> warningMessages = new ArrayList<>();
|
||||
@ -96,8 +97,7 @@ public class DroolsSyntaxValidationService {
|
||||
.distinct()
|
||||
.map(dm -> String.format("Method %s might be deprecated because of \n %s \n", dm, deprecatedMethodsSignatureMap.get(dm)))
|
||||
.collect(Collectors.joining("\n"));
|
||||
warningMessages.add(DroolsSyntaxDeprecatedWarnings.builder().line(basicRule.getLine()).column(0).message(warningMessage)
|
||||
.build());
|
||||
warningMessages.add(DroolsSyntaxDeprecatedWarnings.builder().line(basicRule.getLine()).column(0).message(warningMessage).build());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -160,7 +160,10 @@ public class DroolsSyntaxValidationService {
|
||||
String requiredAgendaGroup = "LOCAL_DICTIONARY_ADDS";
|
||||
if (!validateAgendaGroupIsPresent(ruleFileBluePrint, requiredAgendaGroup)) {
|
||||
customSyntaxValidation.getDroolsSyntaxErrorMessages()
|
||||
.add(DroolsSyntaxErrorMessage.builder().line(0).column(0).message(String.format("At least one rule with Agenda-Group '%s' required!", requiredAgendaGroup))
|
||||
.add(DroolsSyntaxErrorMessage.builder()
|
||||
.line(0)
|
||||
.column(0)
|
||||
.message(String.format("At least one rule with Agenda-Group '%s' required!", requiredAgendaGroup))
|
||||
.build());
|
||||
}
|
||||
}
|
||||
@ -214,15 +217,13 @@ public class DroolsSyntaxValidationService {
|
||||
List<DroolsSyntaxErrorMessage> droolsSyntaxErrorMessages = errorMessages.stream()
|
||||
.map(this::buildDroolsSyntaxErrorMessage)
|
||||
.collect(Collectors.toList());
|
||||
return DroolsSyntaxValidation.builder().droolsSyntaxErrorMessages(droolsSyntaxErrorMessages)
|
||||
.build();
|
||||
return DroolsSyntaxValidation.builder().droolsSyntaxErrorMessages(droolsSyntaxErrorMessages).build();
|
||||
}
|
||||
|
||||
|
||||
private DroolsSyntaxErrorMessage buildDroolsSyntaxErrorMessage(Message message) {
|
||||
|
||||
return DroolsSyntaxErrorMessage.builder().line(message.getLine()).column(message.getColumn()).message(message.getText())
|
||||
.build();
|
||||
return DroolsSyntaxErrorMessage.builder().line(message.getLine()).column(message.getColumn()).message(message.getText()).build();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -30,8 +30,7 @@ public class KieContainerCreationService {
|
||||
private final RulesClient rulesClient;
|
||||
|
||||
|
||||
@Observed(name = "KieContainerCreationService",
|
||||
contextualName = "get-kie-container")
|
||||
@Observed(name = "KieContainerCreationService", contextualName = "get-kie-container")
|
||||
public KieWrapper getLatestKieContainer(String dossierTemplateId, RuleFileType ruleFileType) {
|
||||
|
||||
try {
|
||||
|
||||
@ -52,7 +52,12 @@ public class RuleFileParser {
|
||||
}
|
||||
}
|
||||
|
||||
String imports = ruleString.substring(0, packageDescr.getImports().stream().mapToInt(ImportDescr::getEndCharacter).max().orElseThrow() + 1);
|
||||
String imports = ruleString.substring(0,
|
||||
packageDescr.getImports()
|
||||
.stream()
|
||||
.mapToInt(ImportDescr::getEndCharacter)
|
||||
.max()
|
||||
.orElseThrow() + 1);
|
||||
String globals = packageDescr.getGlobals()
|
||||
.stream()
|
||||
.map(globalDescr -> ruleString.substring(globalDescr.getStartCharacter(), globalDescr.getEndCharacter()))
|
||||
@ -61,11 +66,20 @@ public class RuleFileParser {
|
||||
List<RuleClass> ruleClasses = buildRuleClasses(allRules);
|
||||
|
||||
return new RuleFileBluePrint(imports.trim(),
|
||||
packageDescr.getImports().stream().findFirst().map(ImportDescr::getLine).orElse(0),
|
||||
globals.trim(),
|
||||
packageDescr.getGlobals().stream().findFirst().map(GlobalDescr::getLine).orElse(0), allQueries,
|
||||
ruleClasses,
|
||||
customDroolsSyntaxValidation);
|
||||
packageDescr.getImports()
|
||||
.stream()
|
||||
.findFirst()
|
||||
.map(ImportDescr::getLine)
|
||||
.orElse(0),
|
||||
globals.trim(),
|
||||
packageDescr.getGlobals()
|
||||
.stream()
|
||||
.findFirst()
|
||||
.map(GlobalDescr::getLine)
|
||||
.orElse(0),
|
||||
allQueries,
|
||||
ruleClasses,
|
||||
customDroolsSyntaxValidation);
|
||||
}
|
||||
|
||||
|
||||
@ -91,11 +105,12 @@ public class RuleFileParser {
|
||||
Matcher matcher = ruleIdentifierInCodeFinder.matcher(code);
|
||||
while (matcher.find()) {
|
||||
String identifierInCode = code.substring(matcher.start(1), matcher.end(1));
|
||||
long line = code.substring(0, matcher.start(1)).lines().count() + lineOffset - 1;
|
||||
long line = code.substring(0, matcher.start(1)).lines()
|
||||
.count() + lineOffset - 1;
|
||||
if (!identifier.equals(identifierInCode)) {
|
||||
customDroolsSyntaxValidation.addErrorMessage((int) line,
|
||||
0,
|
||||
String.format("Rule identifier %s is not equal to rule identifier %s in rule name!", identifierInCode, identifier));
|
||||
0,
|
||||
String.format("Rule identifier %s is not equal to rule identifier %s in rule name!", identifierInCode, identifier));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -104,23 +119,35 @@ public class RuleFileParser {
|
||||
private void addDuplicateRuleIdentifierErrorMessage(RuleDescr rule, BasicRule basicRule, DroolsSyntaxValidation customDroolsSyntaxValidation) {
|
||||
|
||||
customDroolsSyntaxValidation.addErrorMessage(rule.getLine(),
|
||||
rule.getColumn(),
|
||||
String.format("RuleIdentifier: %s is a duplicate, duplicates are not allowed!", basicRule.getIdentifier()));
|
||||
rule.getColumn(),
|
||||
String.format("RuleIdentifier: %s is a duplicate, duplicates are not allowed!", basicRule.getIdentifier()));
|
||||
}
|
||||
|
||||
|
||||
private List<RuleClass> buildRuleClasses(List<BasicRule> allRules) {
|
||||
|
||||
List<RuleType> ruleTypeOrder = allRules.stream().map(BasicRule::getIdentifier).map(RuleIdentifier::type).distinct().toList();
|
||||
Map<RuleType, List<BasicRule>> rulesPerType = allRules.stream().collect(groupingBy(rule -> rule.getIdentifier().type()));
|
||||
return ruleTypeOrder.stream().map(type -> new RuleClass(type, groupingByGroup(rulesPerType.get(type)))).collect(Collectors.toList());
|
||||
List<RuleType> ruleTypeOrder = allRules.stream()
|
||||
.map(BasicRule::getIdentifier)
|
||||
.map(RuleIdentifier::type)
|
||||
.distinct()
|
||||
.toList();
|
||||
Map<RuleType, List<BasicRule>> rulesPerType = allRules.stream()
|
||||
.collect(groupingBy(rule -> rule.getIdentifier().type()));
|
||||
return ruleTypeOrder.stream()
|
||||
.map(type -> new RuleClass(type, groupingByGroup(rulesPerType.get(type))))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
private List<RuleUnit> groupingByGroup(List<BasicRule> rules) {
|
||||
|
||||
Map<Integer, List<BasicRule>> rulesPerUnit = rules.stream().collect(groupingBy(rule -> rule.getIdentifier().unit()));
|
||||
return rulesPerUnit.keySet().stream().sorted().map(unit -> new RuleUnit(unit, rulesPerUnit.get(unit))).collect(Collectors.toList());
|
||||
Map<Integer, List<BasicRule>> rulesPerUnit = rules.stream()
|
||||
.collect(groupingBy(rule -> rule.getIdentifier().unit()));
|
||||
return rulesPerUnit.keySet()
|
||||
.stream()
|
||||
.sorted()
|
||||
.map(unit -> new RuleUnit(unit, rulesPerUnit.get(unit)))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -16,6 +16,7 @@ public class ObservedStorageService {
|
||||
|
||||
@Observed(name = "RedactionStorageService", contextualName = "get-document-data")
|
||||
public DocumentData getDocumentData(String dossierId, String fileId) {
|
||||
|
||||
return redactionStorageService.getDocumentData(dossierId, fileId);
|
||||
}
|
||||
|
||||
|
||||
@ -80,9 +80,12 @@ public class RedactionStorageService {
|
||||
|
||||
try {
|
||||
ImportedRedactionsPerPage importedRedactionsPerPage = storageService.readJSONObject(TenantContext.getTenantId(),
|
||||
StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMPORTED_REDACTIONS),
|
||||
ImportedRedactionsPerPage.class);
|
||||
return new ImportedRedactions(importedRedactionsPerPage.getImportedRedactions().values().stream().flatMap(List::stream).collect(Collectors.toList()));
|
||||
StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMPORTED_REDACTIONS),
|
||||
ImportedRedactionsPerPage.class);
|
||||
return new ImportedRedactions(importedRedactionsPerPage.getImportedRedactions().values()
|
||||
.stream()
|
||||
.flatMap(List::stream)
|
||||
.collect(Collectors.toList()));
|
||||
} catch (StorageObjectDoesNotExist e) {
|
||||
log.debug("Imported redactions not available.");
|
||||
return new ImportedRedactions();
|
||||
@ -90,14 +93,13 @@ public class RedactionStorageService {
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Timed("redactmanager_getImportedRedactions")
|
||||
public ImportedRedactionsPerPage getImportedRedactionsPerPage(String dossierId, String fileId) {
|
||||
|
||||
try {
|
||||
return storageService.readJSONObject(TenantContext.getTenantId(),
|
||||
StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMPORTED_REDACTIONS),
|
||||
ImportedRedactionsPerPage.class);
|
||||
StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMPORTED_REDACTIONS),
|
||||
ImportedRedactionsPerPage.class);
|
||||
} catch (StorageObjectDoesNotExist e) {
|
||||
log.debug("Imported redactions not available.");
|
||||
return null;
|
||||
@ -111,12 +113,12 @@ public class RedactionStorageService {
|
||||
|
||||
try {
|
||||
RedactionLog redactionLog = storageService.readJSONObject(TenantContext.getTenantId(),
|
||||
StorageIdUtils.getStorageId(dossierId, fileId, FileType.REDACTION_LOG),
|
||||
RedactionLog.class);
|
||||
StorageIdUtils.getStorageId(dossierId, fileId, FileType.REDACTION_LOG),
|
||||
RedactionLog.class);
|
||||
redactionLog.setRedactionLogEntry(redactionLog.getRedactionLogEntry()
|
||||
.stream()
|
||||
.filter(entry -> !(entry.getPositions() == null || entry.getPositions().isEmpty()))
|
||||
.collect(Collectors.toList()));
|
||||
.stream()
|
||||
.filter(entry -> !(entry.getPositions() == null || entry.getPositions().isEmpty()))
|
||||
.collect(Collectors.toList()));
|
||||
return redactionLog;
|
||||
} catch (StorageObjectDoesNotExist e) {
|
||||
log.debug("RedactionLog not available.");
|
||||
@ -132,9 +134,9 @@ public class RedactionStorageService {
|
||||
try {
|
||||
EntityLog entityLog = storageService.readJSONObject(TenantContext.getTenantId(), StorageIdUtils.getStorageId(dossierId, fileId, FileType.ENTITY_LOG), EntityLog.class);
|
||||
entityLog.setEntityLogEntry(entityLog.getEntityLogEntry()
|
||||
.stream()
|
||||
.filter(entry -> !(entry.getPositions() == null || entry.getPositions().isEmpty()))
|
||||
.collect(Collectors.toList()));
|
||||
.stream()
|
||||
.filter(entry -> !(entry.getPositions() == null || entry.getPositions().isEmpty()))
|
||||
.collect(Collectors.toList()));
|
||||
return entityLog;
|
||||
} catch (StorageObjectDoesNotExist e) {
|
||||
log.debug("EntityLog not available.");
|
||||
@ -156,17 +158,17 @@ public class RedactionStorageService {
|
||||
try {
|
||||
return DocumentData.builder()
|
||||
.documentStructure(storageService.readJSONObject(TenantContext.getTenantId(),
|
||||
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_STRUCTURE),
|
||||
DocumentStructure.class))
|
||||
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_STRUCTURE),
|
||||
DocumentStructure.class))
|
||||
.documentTextData(storageService.readJSONObject(TenantContext.getTenantId(),
|
||||
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_TEXT),
|
||||
DocumentTextData[].class))
|
||||
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_TEXT),
|
||||
DocumentTextData[].class))
|
||||
.documentPositionData(storageService.readJSONObject(TenantContext.getTenantId(),
|
||||
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_POSITION),
|
||||
DocumentPositionData[].class))
|
||||
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_POSITION),
|
||||
DocumentPositionData[].class))
|
||||
.documentPages(storageService.readJSONObject(TenantContext.getTenantId(),
|
||||
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_PAGES),
|
||||
DocumentPage[].class))
|
||||
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_PAGES),
|
||||
DocumentPage[].class))
|
||||
.build();
|
||||
} catch (StorageObjectDoesNotExist e) {
|
||||
log.debug("DocumentData not available.");
|
||||
|
||||
@ -11,6 +11,7 @@ public class RuleManagementResources {
|
||||
|
||||
private static final String folderPrefix = "drools";
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public static InputStream getBaseRuleFileInputStream() {
|
||||
|
||||
@ -26,6 +27,7 @@ public class RuleManagementResources {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public static InputStream getBaseComponentRuleFileInputStream() {
|
||||
|
||||
|
||||
@ -18,11 +18,11 @@ import lombok.extern.slf4j.Slf4j;
|
||||
public class DateConverter {
|
||||
|
||||
static List<SimpleDateFormat> formats = List.of(new SimpleDateFormat("dd MMM yy", Locale.ENGLISH),
|
||||
new SimpleDateFormat("dd MM yyyy", Locale.ENGLISH),
|
||||
new SimpleDateFormat("dd MM yyyy.", Locale.ENGLISH),
|
||||
new SimpleDateFormat("dd MMMM yyyy", Locale.ENGLISH),
|
||||
new SimpleDateFormat("MMMM dd, yyyy", Locale.ENGLISH),
|
||||
new SimpleDateFormat("dd-MMM-yyyy", Locale.ENGLISH));
|
||||
new SimpleDateFormat("dd MM yyyy", Locale.ENGLISH),
|
||||
new SimpleDateFormat("dd MM yyyy.", Locale.ENGLISH),
|
||||
new SimpleDateFormat("dd MMMM yyyy", Locale.ENGLISH),
|
||||
new SimpleDateFormat("MMMM dd, yyyy", Locale.ENGLISH),
|
||||
new SimpleDateFormat("dd-MMM-yyyy", Locale.ENGLISH));
|
||||
|
||||
|
||||
public Optional<Date> parseDate(String dateAsString) {
|
||||
|
||||
@ -21,7 +21,9 @@ public final class IdBuilder {
|
||||
|
||||
public String buildId(Set<Page> pages, List<Rectangle2D> rectanglesPerLine, String type, String entityType) {
|
||||
|
||||
return buildId(pages.stream().map(Page::getNumber).collect(Collectors.toList()), rectanglesPerLine, type, entityType);
|
||||
return buildId(pages.stream()
|
||||
.map(Page::getNumber)
|
||||
.collect(Collectors.toList()), rectanglesPerLine, type, entityType);
|
||||
}
|
||||
|
||||
|
||||
@ -29,7 +31,9 @@ public final class IdBuilder {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(type).append(entityType);
|
||||
List<Integer> sortedPageNumbers = pageNumbers.stream().sorted(Comparator.comparingInt(Integer::intValue)).toList();
|
||||
List<Integer> sortedPageNumbers = pageNumbers.stream()
|
||||
.sorted(Comparator.comparingInt(Integer::intValue))
|
||||
.toList();
|
||||
sortedPageNumbers.forEach(sb::append);
|
||||
rectanglesPerLine.forEach(rectangle2D -> sb.append(Math.round(rectangle2D.getX()))
|
||||
.append(Math.round(rectangle2D.getY()))
|
||||
|
||||
@ -22,19 +22,25 @@ public class RectangleTransformations {
|
||||
|
||||
public static Rectangle2D atomicTextBlockBBox(List<AtomicTextBlock> atomicTextBlocks) {
|
||||
|
||||
return atomicTextBlocks.stream().flatMap(atomicTextBlock -> atomicTextBlock.getPositions().stream()).collect(new Rectangle2DBBoxCollector());
|
||||
return atomicTextBlocks.stream()
|
||||
.flatMap(atomicTextBlock -> atomicTextBlock.getPositions()
|
||||
.stream())
|
||||
.collect(new Rectangle2DBBoxCollector());
|
||||
}
|
||||
|
||||
|
||||
public static Rectangle2D rectangleBBox(List<Position> positions) {
|
||||
|
||||
return positions.stream().map(Position::toRectangle2D).collect(new Rectangle2DBBoxCollector());
|
||||
return positions.stream()
|
||||
.map(Position::toRectangle2D)
|
||||
.collect(new Rectangle2DBBoxCollector());
|
||||
}
|
||||
|
||||
|
||||
public static Rectangle2D rectangle2DBBox(List<Rectangle2D> rectangle2DList) {
|
||||
|
||||
return rectangle2DList.stream().collect(new Rectangle2DBBoxCollector());
|
||||
return rectangle2DList.stream()
|
||||
.collect(new Rectangle2DBBoxCollector());
|
||||
}
|
||||
|
||||
|
||||
@ -49,7 +55,9 @@ public class RectangleTransformations {
|
||||
if (rectangle2DList.isEmpty()) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
double splitThreshold = rectangle2DList.stream().mapToDouble(RectangularShape::getWidth).average().orElse(5) * 5.0;
|
||||
double splitThreshold = rectangle2DList.stream()
|
||||
.mapToDouble(RectangularShape::getWidth).average()
|
||||
.orElse(5) * 5.0;
|
||||
|
||||
List<List<Rectangle2D>> rectangleListsWithGaps = new LinkedList<>();
|
||||
List<Rectangle2D> rectangleListWithoutGaps = new LinkedList<>();
|
||||
@ -66,7 +74,9 @@ public class RectangleTransformations {
|
||||
previousRectangle = currentRectangle;
|
||||
}
|
||||
}
|
||||
return rectangleListsWithGaps.stream().map(RectangleTransformations::rectangle2DBBox).toList();
|
||||
return rectangleListsWithGaps.stream()
|
||||
.map(RectangleTransformations::rectangle2DBBox)
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
@ -96,9 +106,9 @@ public class RectangleTransformations {
|
||||
public BinaryOperator<BBox> combiner() {
|
||||
|
||||
return (b1, b2) -> new BBox(Math.min(b1.lowerLeftX, b2.lowerLeftX),
|
||||
Math.min(b1.lowerLeftY, b2.lowerLeftY),
|
||||
Math.max(b1.upperRightX, b2.upperRightX),
|
||||
Math.max(b1.upperRightY, b2.upperRightY));
|
||||
Math.min(b1.lowerLeftY, b2.lowerLeftY),
|
||||
Math.max(b1.upperRightX, b2.upperRightX),
|
||||
Math.max(b1.upperRightY, b2.upperRightY));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -89,7 +89,10 @@ public class RedactionSearchUtility {
|
||||
|
||||
public static TextRange findTextRangesOfAllLinesInYRange(double maxY, double minY, TextBlock textBlock) {
|
||||
|
||||
List<TextRange> lineBoundaries = IntStream.range(0, textBlock.numberOfLines()).boxed().map(textBlock::getLineTextRange).filter(lineBoundary -> isWithinYRange(maxY, minY, textBlock, lineBoundary)).toList();
|
||||
List<TextRange> lineBoundaries = IntStream.range(0, textBlock.numberOfLines()).boxed()
|
||||
.map(textBlock::getLineTextRange)
|
||||
.filter(lineBoundary -> isWithinYRange(maxY, minY, textBlock, lineBoundary))
|
||||
.toList();
|
||||
if (lineBoundaries.isEmpty()) {
|
||||
return new TextRange(textBlock.getTextRange().start(), textBlock.getTextRange().start());
|
||||
}
|
||||
@ -132,6 +135,7 @@ public class RedactionSearchUtility {
|
||||
return getTextRangesByPatternWithLineBreaks(textBlock, group, pattern);
|
||||
}
|
||||
|
||||
|
||||
public static List<TextRange> findTextRangesByRegexIgnoreCase(String regexPattern, TextBlock textBlock) {
|
||||
|
||||
Pattern pattern = Patterns.getCompiledPattern(regexPattern, true);
|
||||
@ -185,19 +189,21 @@ public class RedactionSearchUtility {
|
||||
return getTextRangesByPattern(textBlock, 0, pattern);
|
||||
}
|
||||
|
||||
|
||||
public static List<TextRange> findTextRangesByList(List<String> searchList, TextBlock textBlock) {
|
||||
|
||||
List<TextRange> boundaries = new LinkedList<>();
|
||||
for (var searchString: searchList) {
|
||||
for (var searchString : searchList) {
|
||||
boundaries.addAll(findTextRangesByString(searchString, textBlock));
|
||||
}
|
||||
return boundaries;
|
||||
}
|
||||
|
||||
|
||||
public static List<TextRange> findTextRangesByListIgnoreCase(List<String> searchList, TextBlock textBlock) {
|
||||
|
||||
List<TextRange> boundaries = new LinkedList<>();
|
||||
for (var searchString: searchList) {
|
||||
for (var searchString : searchList) {
|
||||
boundaries.addAll(findTextRangesByStringIgnoreCase(searchString, textBlock));
|
||||
}
|
||||
return boundaries;
|
||||
|
||||
@ -20,7 +20,8 @@ public final class ResourceLoader {
|
||||
throw new IllegalArgumentException("could not load classpath resource: " + classpathPath);
|
||||
}
|
||||
try (BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), StandardCharsets.UTF_8))) {
|
||||
return br.lines().collect(Collectors.toSet());
|
||||
return br.lines()
|
||||
.collect(Collectors.toSet());
|
||||
} catch (IOException e) {
|
||||
throw new IllegalArgumentException("could not load classpath resource: " + classpathPath, e);
|
||||
}
|
||||
|
||||
@ -54,16 +54,16 @@ public final class SeparatorUtils {
|
||||
private static boolean validateEnd(TextBlock textBlock, TextRange textRange) {
|
||||
|
||||
return textRange.end() == textBlock.getTextRange().end() ||//
|
||||
SeparatorUtils.isSeparator(textBlock.charAt(textRange.end())) ||//
|
||||
SeparatorUtils.isJapaneseSeparator(textBlock.charAt(textRange.end() - 1));
|
||||
SeparatorUtils.isSeparator(textBlock.charAt(textRange.end())) ||//
|
||||
SeparatorUtils.isJapaneseSeparator(textBlock.charAt(textRange.end() - 1));
|
||||
}
|
||||
|
||||
|
||||
private static boolean validateStart(TextBlock textBlock, TextRange textRange) {
|
||||
|
||||
return textRange.start() == textBlock.getTextRange().start() ||//
|
||||
SeparatorUtils.isSeparator(textBlock.charAt(textRange.start() - 1)) ||//
|
||||
SeparatorUtils.isJapaneseSeparator(textBlock.charAt(textRange.start()));
|
||||
SeparatorUtils.isSeparator(textBlock.charAt(textRange.start() - 1)) ||//
|
||||
SeparatorUtils.isJapaneseSeparator(textBlock.charAt(textRange.start()));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -172,7 +172,7 @@ public abstract class AbstractRedactionIntegrationTest {
|
||||
when(dictionaryClient.getDictionaryForType(HINT_ONLY_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(HINT_ONLY_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(MUST_REDACT_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(MUST_REDACT_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(PUBLISHED_INFORMATION_INDICATOR,
|
||||
false));
|
||||
false));
|
||||
when(dictionaryClient.getDictionaryForType(TEST_METHOD_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(TEST_METHOD_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(PII_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(DICTIONARY_PII, false));
|
||||
when(dictionaryClient.getDictionaryForType(PURITY_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(PURITY_INDICATOR, false));
|
||||
@ -183,9 +183,9 @@ public abstract class AbstractRedactionIntegrationTest {
|
||||
when(dictionaryClient.getDictionaryForType(FORMULA_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(FORMULA_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(ROTATE_SIMPLE_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(ROTATE_SIMPLE_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(DOSSIER_REDACTIONS_INDICATOR,
|
||||
true));
|
||||
true));
|
||||
when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(IMPORTED_REDACTION_INDICATOR,
|
||||
true));
|
||||
true));
|
||||
|
||||
}
|
||||
|
||||
@ -193,45 +193,102 @@ public abstract class AbstractRedactionIntegrationTest {
|
||||
protected void loadDictionaryForTest() {
|
||||
|
||||
dictionary.computeIfAbsent(DICTIONARY_AUTHOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/CBI_author.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/CBI_author.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(DICTIONARY_SPONSOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(VERTEBRATE_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/vertebrate.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/vertebrate.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(DICTIONARY_ADDRESS, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/CBI_address.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/CBI_address.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(HINT_ONLY_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/hint_only.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/hint_only.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(MUST_REDACT_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/must_redact.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/must_redact.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(PUBLISHED_INFORMATION_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/published_information.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/published_information.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(TEST_METHOD_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/test_method.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/test_method.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(DICTIONARY_PII, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/PII.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/PII.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(PURITY_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/purity.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/purity.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(IMAGE_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(OCR_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(LOGO_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(SIGNATURE_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(FORMULA_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dossierDictionary.put(IMPORTED_REDACTION_INDICATOR, new ArrayList<>());
|
||||
|
||||
falsePositive.computeIfAbsent(DICTIONARY_PII, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
|
||||
}
|
||||
|
||||
@ -240,7 +297,10 @@ public abstract class AbstractRedactionIntegrationTest {
|
||||
|
||||
dictionary.clear();
|
||||
dictionary.computeIfAbsent(ROTATE_SIMPLE_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/RotateTestFileSimple.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/RotateTestFileSimple.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
}
|
||||
|
||||
|
||||
@ -373,8 +433,8 @@ public abstract class AbstractRedactionIntegrationTest {
|
||||
|
||||
ClassPathResource responseJson = new ClassPathResource("files/ner_response.json");
|
||||
storageService.storeObject(TenantContext.getTenantId(),
|
||||
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES),
|
||||
responseJson.getInputStream());
|
||||
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES),
|
||||
responseJson.getInputStream());
|
||||
}
|
||||
|
||||
|
||||
@ -424,7 +484,9 @@ public abstract class AbstractRedactionIntegrationTest {
|
||||
if (entries == null) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
return entries.stream().map(this::toDictionaryEntry).collect(Collectors.toList());
|
||||
return entries.stream()
|
||||
.map(this::toDictionaryEntry)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
}
|
||||
|
||||
@ -480,11 +542,11 @@ public abstract class AbstractRedactionIntegrationTest {
|
||||
|
||||
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), fileStream);
|
||||
storageService.storeObject(TenantContext.getTenantId(),
|
||||
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES),
|
||||
cvServiceResponseFileStream);
|
||||
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES),
|
||||
cvServiceResponseFileStream);
|
||||
storageService.storeObject(TenantContext.getTenantId(),
|
||||
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMAGE_INFO),
|
||||
imageServiceResponseStream);
|
||||
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMAGE_INFO),
|
||||
imageServiceResponseStream);
|
||||
|
||||
return request;
|
||||
|
||||
|
||||
@ -104,15 +104,15 @@ public class AnalysisTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder()
|
||||
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.type(DOSSIER_REDACTIONS_INDICATOR)
|
||||
.dossierTemplateId(TEST_DOSSIER_ID)
|
||||
.hexColor("#ffe187")
|
||||
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.build()));
|
||||
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.type(DOSSIER_REDACTIONS_INDICATOR)
|
||||
.dossierTemplateId(TEST_DOSSIER_ID)
|
||||
.hexColor("#ffe187")
|
||||
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.build()));
|
||||
|
||||
mockDictionaryCalls(null);
|
||||
|
||||
|
||||
@ -82,18 +82,18 @@ public class DictionaryServiceTest {
|
||||
|
||||
when(dictionaryClient.getVersion(anyString())).thenReturn(0L);
|
||||
when(dictionaryClient.getColors(anyString())).thenReturn(new Colors("dossierTemplateId",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc"));
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc"));
|
||||
|
||||
var type1 = new Type();
|
||||
type1.setType("type1");
|
||||
@ -101,7 +101,9 @@ public class DictionaryServiceTest {
|
||||
type1.setVersion(1L);
|
||||
type1.setHexColor("#cccccc");
|
||||
type1.setHasDictionary(true);
|
||||
type1.setEntries(Stream.of("a", "b", "c").map(t -> new DictionaryEntry(1, t, 1L, false, "type1")).collect(Collectors.toList()));
|
||||
type1.setEntries(Stream.of("a", "b", "c")
|
||||
.map(t -> new DictionaryEntry(1, t, 1L, false, "type1"))
|
||||
.collect(Collectors.toList()));
|
||||
|
||||
var type2 = new Type();
|
||||
type2.setType("type2");
|
||||
@ -109,7 +111,9 @@ public class DictionaryServiceTest {
|
||||
type2.setVersion(1L);
|
||||
type2.setHexColor("#cccccc");
|
||||
type2.setHasDictionary(true);
|
||||
type2.setEntries(Stream.of("d", "e", "f").map(t -> new DictionaryEntry(1, t, 1L, false, "type2")).collect(Collectors.toList()));
|
||||
type2.setEntries(Stream.of("d", "e", "f")
|
||||
.map(t -> new DictionaryEntry(1, t, 1L, false, "type2"))
|
||||
.collect(Collectors.toList()));
|
||||
|
||||
var type1Updated = new Type();
|
||||
type1Updated.setType("type1");
|
||||
@ -117,7 +121,9 @@ public class DictionaryServiceTest {
|
||||
type1Updated.setVersion(2L);
|
||||
type1Updated.setHexColor("#cccccc");
|
||||
type1Updated.setHasDictionary(true);
|
||||
type1Updated.setEntries(Stream.of("z", "q", "x").map(t -> new DictionaryEntry(1, t, 2L, false, "type1")).collect(Collectors.toList()));
|
||||
type1Updated.setEntries(Stream.of("z", "q", "x")
|
||||
.map(t -> new DictionaryEntry(1, t, 2L, false, "type1"))
|
||||
.collect(Collectors.toList()));
|
||||
|
||||
when(dictionaryClient.getAllTypesForDossierTemplate(anyString(), anyBoolean())).thenReturn(List.of(type1, type2));
|
||||
when(dictionaryClient.getDictionaryForType("type1", null)).thenReturn(type1);
|
||||
@ -174,18 +180,18 @@ public class DictionaryServiceTest {
|
||||
|
||||
when(dictionaryClient.getVersion(anyString())).thenReturn(0L);
|
||||
when(dictionaryClient.getColors(anyString())).thenReturn(new Colors("dtId",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc"));
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc",
|
||||
"#cccccc"));
|
||||
|
||||
var type = "type";
|
||||
var dtType = new Type();
|
||||
@ -195,7 +201,9 @@ public class DictionaryServiceTest {
|
||||
dtType.setHexColor("#cccccc");
|
||||
dtType.setHasDictionary(true);
|
||||
dtType.setDossierTemplateId("dtId");
|
||||
List<DictionaryEntry> dossierTemplateEntries = Stream.of("aa", "bb").map(t -> new DictionaryEntry(1, t, 1L, false, "type1")).collect(Collectors.toList());
|
||||
List<DictionaryEntry> dossierTemplateEntries = Stream.of("aa", "bb")
|
||||
.map(t -> new DictionaryEntry(1, t, 1L, false, "type1"))
|
||||
.collect(Collectors.toList());
|
||||
dossierTemplateEntries.add(new DictionaryEntry(1, "cc", 2L, false, "type1"));
|
||||
assertThat(dossierTemplateEntries.size()).isEqualTo(3);
|
||||
dtType.setEntries(dossierTemplateEntries);
|
||||
@ -208,7 +216,9 @@ public class DictionaryServiceTest {
|
||||
dossierType.setHasDictionary(true);
|
||||
dossierType.setDossierTemplateId("dtId");
|
||||
dossierType.setDossierId("dossierId");
|
||||
List<DictionaryEntry> dossierEntries = Stream.of("aa", "bb").map(t -> new DictionaryEntry(1, t, 2L, true, "dossierType")).collect(Collectors.toList());
|
||||
List<DictionaryEntry> dossierEntries = Stream.of("aa", "bb")
|
||||
.map(t -> new DictionaryEntry(1, t, 2L, true, "dossierType"))
|
||||
.collect(Collectors.toList());
|
||||
dossierEntries.add(new DictionaryEntry(1, "dd", 1L, false, "dossierType"));
|
||||
assertThat(dossierEntries.size()).isEqualTo(3);
|
||||
dossierType.setEntries(dossierEntries);
|
||||
@ -224,15 +234,17 @@ public class DictionaryServiceTest {
|
||||
dictionaryService.updateDictionary("dtId", "dossierId");
|
||||
var dict = dictionaryService.getDeepCopyDictionary("dtId", "dossierId");
|
||||
assertThat(dict.getDictionaryModels().size()).isEqualTo(1);
|
||||
var dictModel = dict.getDictionaryModels().get(0);
|
||||
var dictModel = dict.getDictionaryModels()
|
||||
.get(0);
|
||||
assertThat(dictModel.getType()).isEqualTo(type);
|
||||
assertThat(dictModel.getEntries().size()).isEqualTo(4);
|
||||
dictModel.getEntries().forEach(entry -> {
|
||||
switch (entry.getValue()) {
|
||||
case "aa", "dd", "bb" -> assertThat(entry.getTypeId()).isEqualTo(dossierType.getTypeId());
|
||||
case "cc" -> assertThat(entry.getTypeId()).isEqualTo(dtType.getTypeId());
|
||||
}
|
||||
});
|
||||
dictModel.getEntries()
|
||||
.forEach(entry -> {
|
||||
switch (entry.getValue()) {
|
||||
case "aa", "dd", "bb" -> assertThat(entry.getTypeId()).isEqualTo(dossierType.getTypeId());
|
||||
case "cc" -> assertThat(entry.getTypeId()).isEqualTo(dtType.getTypeId());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -8,6 +8,7 @@ import java.util.List;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
@ -55,9 +56,11 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
AnalyzeRequest request = uploadFileToStorage("files/ImportedRedactions/18 Chlorothalonil RAR 08 Volume 3CA B 6a Oct 2017.pdf");
|
||||
|
||||
ClassPathResource importedRedactionClasspathResource = new ClassPathResource("files/ImportedRedactions/18 Chlorothalonil RAR 08 Volume 3CA B 6a Oct 2017.IMPORTED_REDACTIONS.json");
|
||||
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), importedRedactionClasspathResource.getInputStream());
|
||||
|
||||
ClassPathResource importedRedactionClasspathResource = new ClassPathResource(
|
||||
"files/ImportedRedactions/18 Chlorothalonil RAR 08 Volume 3CA B 6a Oct 2017.IMPORTED_REDACTIONS.json");
|
||||
storageService.storeObject(TenantContext.getTenantId(),
|
||||
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS),
|
||||
importedRedactionClasspathResource.getInputStream());
|
||||
|
||||
// AnalyzeRequest request = prepareStorage("files/Documine/Flora/ProblemDocs/SOLICITA_VICTRATO-GOLD-II_Item 21_Mutacao_Genica (1).pdf",
|
||||
// "files/Documine/Flora/ProblemDocs/SOLICITA_VICTRATO-GOLD-II_Item 21_Mutacao_Genica (1).TABLES.json");
|
||||
@ -86,7 +89,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
// FIXME TableNodeFactory: 36, why has table no rows/cols here.
|
||||
AnalyzeRequest request = prepareStorage("files/Documine/Flora/A8591B/2009773e2e05919bb9e46aeedcc8b924.ORIGIN.pdf",
|
||||
"files/Documine/Flora/A8591B/2009773e2e05919bb9e46aeedcc8b924.ORIGIN.TABLES.json");
|
||||
"files/Documine/Flora/A8591B/2009773e2e05919bb9e46aeedcc8b924.ORIGIN.TABLES.json");
|
||||
|
||||
System.out.println("Start Full integration test");
|
||||
analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request);
|
||||
@ -166,15 +169,15 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, true)).thenReturn(List.of(Type.builder()
|
||||
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.type(DOSSIER_REDACTIONS_INDICATOR)
|
||||
.dossierTemplateId(TEST_DOSSIER_ID)
|
||||
.hexColor("#ffe187")
|
||||
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.build()));
|
||||
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.type(DOSSIER_REDACTIONS_INDICATOR)
|
||||
.dossierTemplateId(TEST_DOSSIER_ID)
|
||||
.hexColor("#ffe187")
|
||||
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.build()));
|
||||
|
||||
mockDictionaryCalls(null);
|
||||
|
||||
|
||||
@ -87,15 +87,15 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, true)).thenReturn(List.of(Type.builder()
|
||||
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.type(DOSSIER_REDACTIONS_INDICATOR)
|
||||
.dossierTemplateId(TEST_DOSSIER_ID)
|
||||
.hexColor("#ffe187")
|
||||
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.build()));
|
||||
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.type(DOSSIER_REDACTIONS_INDICATOR)
|
||||
.dossierTemplateId(TEST_DOSSIER_ID)
|
||||
.hexColor("#ffe187")
|
||||
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.build()));
|
||||
|
||||
mockDictionaryCalls(null);
|
||||
|
||||
@ -122,6 +122,7 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
||||
assertThat(recommendations).containsExactlyInAnyOrder("Michael N.", "Funnarie B.", "Feuer A.");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void acceptanceTests() throws IOException {
|
||||
|
||||
@ -133,8 +134,10 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
||||
System.out.println("Finished analysis");
|
||||
EntityLog entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
var publishedInformationEntry1 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst().orElseThrow();
|
||||
var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getContainingNodeId()).findFirst().orElseThrow();
|
||||
var publishedInformationEntry1 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst()
|
||||
.orElseThrow();
|
||||
var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getContainingNodeId()).findFirst()
|
||||
.orElseThrow();
|
||||
|
||||
assertEquals(EntryState.SKIPPED, asyaLyon1.getState());
|
||||
|
||||
@ -146,8 +149,10 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
var publishedInformationEntry2 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst().orElseThrow();
|
||||
var asyaLyon2 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry2.getContainingNodeId()).findFirst().orElseThrow();
|
||||
var publishedInformationEntry2 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst()
|
||||
.orElseThrow();
|
||||
var asyaLyon2 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry2.getContainingNodeId()).findFirst()
|
||||
.orElseThrow();
|
||||
|
||||
assertEquals(EntryState.APPLIED, asyaLyon2.getState());
|
||||
|
||||
@ -168,13 +173,17 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
||||
.stream()
|
||||
.filter(entry -> entry.getType().equals(type))
|
||||
.filter(entry -> entry.getValue().equals(value))
|
||||
.filter(entry -> entry.getContainingNodeId().get(0).equals(sectionNumber.get(0)));
|
||||
.filter(entry -> entry.getContainingNodeId()
|
||||
.get(0).equals(sectionNumber.get(0)));
|
||||
}
|
||||
|
||||
|
||||
private static Stream<EntityLogEntry> findEntityByTypeAndValue(EntityLog redactionLog, String type, String value) {
|
||||
|
||||
return redactionLog.getEntityLogEntry().stream().filter(entry -> entry.getType().equals(type)).filter(entry -> entry.getValue().equals(value));
|
||||
return redactionLog.getEntityLogEntry()
|
||||
.stream()
|
||||
.filter(entry -> entry.getType().equals(type))
|
||||
.filter(entry -> entry.getValue().equals(value));
|
||||
}
|
||||
|
||||
|
||||
@ -201,7 +210,9 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
var redactionLog2 = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
assertEquals(EntryState.IGNORED,
|
||||
findEntityByTypeAndValue(redactionLog2, "CBI_author", "Desiree").filter(entry -> entry.getEntryType().equals(EntryType.ENTITY)).findFirst().get().getState());
|
||||
findEntityByTypeAndValue(redactionLog2, "CBI_author", "Desiree").filter(entry -> entry.getEntryType().equals(EntryType.ENTITY))
|
||||
.findFirst()
|
||||
.get().getState());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -78,15 +78,15 @@ public class RedactionIntegrationV2Test extends AbstractRedactionIntegrationTest
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder()
|
||||
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.type(DOSSIER_REDACTIONS_INDICATOR)
|
||||
.dossierTemplateId(TEST_DOSSIER_ID)
|
||||
.hexColor("#ffe187")
|
||||
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.build()));
|
||||
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.type(DOSSIER_REDACTIONS_INDICATOR)
|
||||
.dossierTemplateId(TEST_DOSSIER_ID)
|
||||
.hexColor("#ffe187")
|
||||
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.build()));
|
||||
|
||||
mockDictionaryCalls(null);
|
||||
|
||||
@ -128,7 +128,8 @@ public class RedactionIntegrationV2Test extends AbstractRedactionIntegrationTest
|
||||
|
||||
assertThat(entityLog.getEntityLogEntry().size()).isEqualTo(1);
|
||||
|
||||
EntityLogEntry redactionLogEntry = entityLog.getEntityLogEntry().get(0);
|
||||
EntityLogEntry redactionLogEntry = entityLog.getEntityLogEntry()
|
||||
.get(0);
|
||||
|
||||
assertThat(redactionLogEntry.getType()).isEqualTo(DICTIONARY_AUTHOR);
|
||||
assertThat(redactionLogEntry.getValue()).isEqualTo(entryAuthorAndPIIDictionary);
|
||||
@ -164,7 +165,8 @@ public class RedactionIntegrationV2Test extends AbstractRedactionIntegrationTest
|
||||
|
||||
assertThat(redactionLog.getEntityLogEntry().size()).isEqualTo(1);
|
||||
|
||||
EntityLogEntry redactionLogEntry = redactionLog.getEntityLogEntry().get(0);
|
||||
EntityLogEntry redactionLogEntry = redactionLog.getEntityLogEntry()
|
||||
.get(0);
|
||||
|
||||
assertThat(redactionLogEntry.getType()).isEqualTo(DICTIONARY_AUTHOR);
|
||||
assertThat(redactionLogEntry.getValue()).isEqualTo(entryAuthorDictionary);
|
||||
|
||||
@ -136,99 +136,99 @@ public class RulesTest {
|
||||
private final Map<String, List<String>> dossierDictionary = new HashMap<>();
|
||||
private final Map<String, List<String>> falsePositive = new HashMap<>();
|
||||
private static final Map<String, String> typeColorMap = Map.ofEntries(entry(VERTEBRATE, "#ff85f7"),
|
||||
entry(ADDRESS, "#ffe187"),
|
||||
entry(AUTHOR, "#ffe187"),
|
||||
entry(SPONSOR, "#85ebff"),
|
||||
entry(NO_REDACTION_INDICATOR, "#be85ff"),
|
||||
entry(REDACTION_INDICATOR, "#caff85"),
|
||||
entry(HINT_ONLY, "#abc0c4"),
|
||||
entry(MUST_REDACT, "#fab4c0"),
|
||||
entry(PUBLISHED_INFORMATION, "#85ebff"),
|
||||
entry(TEST_METHOD, "#91fae8"),
|
||||
entry(PII, "#66ccff"),
|
||||
entry(PURITY, "#ffe187"),
|
||||
entry(IMAGE, "#fcc5fb"),
|
||||
entry(OCR, "#fcc5fb"),
|
||||
entry(LOGO, "#ffe187"),
|
||||
entry(FORMULA, "#ffe187"),
|
||||
entry(SIGNATURE, "#ffe187"),
|
||||
entry(IMPORTED_REDACTION, "#fcfbe6"));
|
||||
entry(ADDRESS, "#ffe187"),
|
||||
entry(AUTHOR, "#ffe187"),
|
||||
entry(SPONSOR, "#85ebff"),
|
||||
entry(NO_REDACTION_INDICATOR, "#be85ff"),
|
||||
entry(REDACTION_INDICATOR, "#caff85"),
|
||||
entry(HINT_ONLY, "#abc0c4"),
|
||||
entry(MUST_REDACT, "#fab4c0"),
|
||||
entry(PUBLISHED_INFORMATION, "#85ebff"),
|
||||
entry(TEST_METHOD, "#91fae8"),
|
||||
entry(PII, "#66ccff"),
|
||||
entry(PURITY, "#ffe187"),
|
||||
entry(IMAGE, "#fcc5fb"),
|
||||
entry(OCR, "#fcc5fb"),
|
||||
entry(LOGO, "#ffe187"),
|
||||
entry(FORMULA, "#ffe187"),
|
||||
entry(SIGNATURE, "#ffe187"),
|
||||
entry(IMPORTED_REDACTION, "#fcfbe6"));
|
||||
private static final Map<String, Boolean> hintTypeMap = Map.ofEntries(entry(VERTEBRATE, true),
|
||||
entry(ADDRESS, false),
|
||||
entry(AUTHOR, false),
|
||||
entry(SPONSOR, false),
|
||||
entry(NO_REDACTION_INDICATOR, true),
|
||||
entry(REDACTION_INDICATOR, true),
|
||||
entry(HINT_ONLY, true),
|
||||
entry(MUST_REDACT, true),
|
||||
entry(PUBLISHED_INFORMATION, true),
|
||||
entry(TEST_METHOD, true),
|
||||
entry(PII, false),
|
||||
entry(PURITY, false),
|
||||
entry(IMAGE, true),
|
||||
entry(OCR, true),
|
||||
entry(FORMULA, false),
|
||||
entry(LOGO, false),
|
||||
entry(SIGNATURE, false),
|
||||
entry(DOSSIER_REDACTIONS, false),
|
||||
entry(IMPORTED_REDACTION, false));
|
||||
entry(ADDRESS, false),
|
||||
entry(AUTHOR, false),
|
||||
entry(SPONSOR, false),
|
||||
entry(NO_REDACTION_INDICATOR, true),
|
||||
entry(REDACTION_INDICATOR, true),
|
||||
entry(HINT_ONLY, true),
|
||||
entry(MUST_REDACT, true),
|
||||
entry(PUBLISHED_INFORMATION, true),
|
||||
entry(TEST_METHOD, true),
|
||||
entry(PII, false),
|
||||
entry(PURITY, false),
|
||||
entry(IMAGE, true),
|
||||
entry(OCR, true),
|
||||
entry(FORMULA, false),
|
||||
entry(LOGO, false),
|
||||
entry(SIGNATURE, false),
|
||||
entry(DOSSIER_REDACTIONS, false),
|
||||
entry(IMPORTED_REDACTION, false));
|
||||
private static final Map<String, Boolean> caseInSensitiveMap = Map.ofEntries(entry(VERTEBRATE, true),
|
||||
entry(ADDRESS, false),
|
||||
entry(AUTHOR, false),
|
||||
entry(SPONSOR, false),
|
||||
entry(NO_REDACTION_INDICATOR, true),
|
||||
entry(REDACTION_INDICATOR, true),
|
||||
entry(HINT_ONLY, true),
|
||||
entry(MUST_REDACT, true),
|
||||
entry(PUBLISHED_INFORMATION, true),
|
||||
entry(TEST_METHOD, false),
|
||||
entry(PII, false),
|
||||
entry(PURITY, false),
|
||||
entry(IMAGE, true),
|
||||
entry(OCR, true),
|
||||
entry(SIGNATURE, true),
|
||||
entry(LOGO, true),
|
||||
entry(FORMULA, true),
|
||||
entry(DOSSIER_REDACTIONS, false),
|
||||
entry(IMPORTED_REDACTION, false));
|
||||
entry(ADDRESS, false),
|
||||
entry(AUTHOR, false),
|
||||
entry(SPONSOR, false),
|
||||
entry(NO_REDACTION_INDICATOR, true),
|
||||
entry(REDACTION_INDICATOR, true),
|
||||
entry(HINT_ONLY, true),
|
||||
entry(MUST_REDACT, true),
|
||||
entry(PUBLISHED_INFORMATION, true),
|
||||
entry(TEST_METHOD, false),
|
||||
entry(PII, false),
|
||||
entry(PURITY, false),
|
||||
entry(IMAGE, true),
|
||||
entry(OCR, true),
|
||||
entry(SIGNATURE, true),
|
||||
entry(LOGO, true),
|
||||
entry(FORMULA, true),
|
||||
entry(DOSSIER_REDACTIONS, false),
|
||||
entry(IMPORTED_REDACTION, false));
|
||||
private static final Map<String, Boolean> recommendationTypeMap = Map.ofEntries(entry(VERTEBRATE, false),
|
||||
entry(ADDRESS, false),
|
||||
entry(AUTHOR, false),
|
||||
entry(SPONSOR, false),
|
||||
entry(NO_REDACTION_INDICATOR, false),
|
||||
entry(REDACTION_INDICATOR, false),
|
||||
entry(HINT_ONLY, false),
|
||||
entry(MUST_REDACT, false),
|
||||
entry(PUBLISHED_INFORMATION, false),
|
||||
entry(TEST_METHOD, false),
|
||||
entry(PII, false),
|
||||
entry(PURITY, false),
|
||||
entry(IMAGE, false),
|
||||
entry(OCR, false),
|
||||
entry(FORMULA, false),
|
||||
entry(SIGNATURE, false),
|
||||
entry(LOGO, false),
|
||||
entry(DOSSIER_REDACTIONS, false),
|
||||
entry(IMPORTED_REDACTION, false));
|
||||
entry(ADDRESS, false),
|
||||
entry(AUTHOR, false),
|
||||
entry(SPONSOR, false),
|
||||
entry(NO_REDACTION_INDICATOR, false),
|
||||
entry(REDACTION_INDICATOR, false),
|
||||
entry(HINT_ONLY, false),
|
||||
entry(MUST_REDACT, false),
|
||||
entry(PUBLISHED_INFORMATION, false),
|
||||
entry(TEST_METHOD, false),
|
||||
entry(PII, false),
|
||||
entry(PURITY, false),
|
||||
entry(IMAGE, false),
|
||||
entry(OCR, false),
|
||||
entry(FORMULA, false),
|
||||
entry(SIGNATURE, false),
|
||||
entry(LOGO, false),
|
||||
entry(DOSSIER_REDACTIONS, false),
|
||||
entry(IMPORTED_REDACTION, false));
|
||||
private static final Map<String, Integer> rankTypeMap = Map.ofEntries(entry(PURITY, 155),
|
||||
entry(PII, 150),
|
||||
entry(ADDRESS, 140),
|
||||
entry(AUTHOR, 130),
|
||||
entry(SPONSOR, 120),
|
||||
entry(VERTEBRATE, 110),
|
||||
entry(MUST_REDACT, 100),
|
||||
entry(REDACTION_INDICATOR, 90),
|
||||
entry(NO_REDACTION_INDICATOR, 80),
|
||||
entry(PUBLISHED_INFORMATION, 70),
|
||||
entry(TEST_METHOD, 60),
|
||||
entry(HINT_ONLY, 50),
|
||||
entry(IMAGE, 30),
|
||||
entry(OCR, 29),
|
||||
entry(LOGO, 28),
|
||||
entry(SIGNATURE, 27),
|
||||
entry(FORMULA, 26),
|
||||
entry(DOSSIER_REDACTIONS, 200),
|
||||
entry(IMPORTED_REDACTION, 200));
|
||||
entry(PII, 150),
|
||||
entry(ADDRESS, 140),
|
||||
entry(AUTHOR, 130),
|
||||
entry(SPONSOR, 120),
|
||||
entry(VERTEBRATE, 110),
|
||||
entry(MUST_REDACT, 100),
|
||||
entry(REDACTION_INDICATOR, 90),
|
||||
entry(NO_REDACTION_INDICATOR, 80),
|
||||
entry(PUBLISHED_INFORMATION, 70),
|
||||
entry(TEST_METHOD, 60),
|
||||
entry(HINT_ONLY, 50),
|
||||
entry(IMAGE, 30),
|
||||
entry(OCR, 29),
|
||||
entry(LOGO, 28),
|
||||
entry(SIGNATURE, 27),
|
||||
entry(FORMULA, 26),
|
||||
entry(DOSSIER_REDACTIONS, 200),
|
||||
entry(IMPORTED_REDACTION, 200));
|
||||
private final Colors colors = new Colors();
|
||||
|
||||
@Autowired
|
||||
@ -273,15 +273,15 @@ public class RulesTest {
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder()
|
||||
.id(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.type(DOSSIER_REDACTIONS)
|
||||
.dossierTemplateId(TEST_DOSSIER_ID)
|
||||
.hexColor("#ffe187")
|
||||
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS))
|
||||
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS))
|
||||
.rank(rankTypeMap.get(DOSSIER_REDACTIONS))
|
||||
.build()));
|
||||
.id(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.type(DOSSIER_REDACTIONS)
|
||||
.dossierTemplateId(TEST_DOSSIER_ID)
|
||||
.hexColor("#ffe187")
|
||||
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS))
|
||||
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS))
|
||||
.rank(rankTypeMap.get(DOSSIER_REDACTIONS))
|
||||
.build()));
|
||||
|
||||
mockDictionaryCalls(null);
|
||||
mockDictionaryCalls(0L);
|
||||
@ -372,10 +372,10 @@ public class RulesTest {
|
||||
log.warn("WARNING: {} files from {} failed", failedFiles.size(), fileSize);
|
||||
for (String fileName : failedFiles.keySet()) {
|
||||
log.warn(" - '{}' failed with Error: {} See line {} in {}",
|
||||
fileName,
|
||||
failedFiles.get(fileName),
|
||||
failedFiles.get(fileName).getStackTrace()[0].getLineNumber(),
|
||||
failedFiles.get(fileName).getStackTrace()[0].getClassName());
|
||||
fileName,
|
||||
failedFiles.get(fileName),
|
||||
failedFiles.get(fileName).getStackTrace()[0].getLineNumber(),
|
||||
failedFiles.get(fileName).getStackTrace()[0].getClassName());
|
||||
}
|
||||
}
|
||||
|
||||
@ -442,9 +442,13 @@ public class RulesTest {
|
||||
assertThat(entityLog.getLegalBasisVersion()).isEqualTo(savedRedactionLog.getLegalBasisVersion());
|
||||
|
||||
assertThat(entityLog.getEntityLogEntry()
|
||||
.stream().filter(r -> !r.getEntryType().equals(EntryType.FALSE_POSITIVE)).filter(r -> !r.getEntryType().equals(EntryType.FALSE_RECOMMENDATION))
|
||||
.collect(Collectors.toSet())
|
||||
.size()).isEqualTo(savedRedactionLog.getRedactionLogEntry().stream().filter(r -> !r.isFalsePositive()).collect(Collectors.toSet()).size());
|
||||
.stream()
|
||||
.filter(r -> !r.getEntryType().equals(EntryType.FALSE_POSITIVE))
|
||||
.filter(r -> !r.getEntryType().equals(EntryType.FALSE_RECOMMENDATION))
|
||||
.collect(Collectors.toSet()).size()).isEqualTo(savedRedactionLog.getRedactionLogEntry()
|
||||
.stream()
|
||||
.filter(r -> !r.isFalsePositive())
|
||||
.collect(Collectors.toSet()).size());
|
||||
assertThat(entityLog.getLegalBasis().size()).isEqualTo(savedRedactionLog.getLegalBasis().size());
|
||||
|
||||
for (EntityLogLegalBasis redactionLegalBasis : entityLog.getLegalBasis()) {
|
||||
@ -458,7 +462,10 @@ public class RulesTest {
|
||||
}
|
||||
|
||||
for (EntityLogEntry redactionLogEntry : entityLog.getEntityLogEntry()) {
|
||||
var savedRedactionLogEntry = savedRedactionLog.getRedactionLogEntry().stream().filter(r -> r.getId().equalsIgnoreCase(redactionLogEntry.getId())).findFirst();
|
||||
var savedRedactionLogEntry = savedRedactionLog.getRedactionLogEntry()
|
||||
.stream()
|
||||
.filter(r -> r.getId().equalsIgnoreCase(redactionLogEntry.getId()))
|
||||
.findFirst();
|
||||
assertThat(savedRedactionLogEntry).isPresent();
|
||||
assertThat(savedRedactionLogEntry.get().getId()).isEqualTo(redactionLogEntry.getId());
|
||||
assertThat(savedRedactionLogEntry.get().getType()).isEqualTo(redactionLogEntry.getType());
|
||||
@ -473,7 +480,8 @@ public class RulesTest {
|
||||
assertThat(savedRedactionLogEntry.get().isFalsePositive()).isEqualTo(redactionLogEntry.getEntryType().equals(EntryType.FALSE_POSITIVE));
|
||||
assertThat(savedRedactionLogEntry.get().getSection()).isEqualTo(redactionLogEntry.getSection());
|
||||
assertThat(savedRedactionLogEntry.get().getColor()).isEqualTo(redactionLogEntry.getColor());
|
||||
assertThat(savedRedactionLogEntry.get().getSectionNumber()).isEqualTo(redactionLogEntry.getContainingNodeId().get(0));
|
||||
assertThat(savedRedactionLogEntry.get().getSectionNumber()).isEqualTo(redactionLogEntry.getContainingNodeId()
|
||||
.get(0));
|
||||
assertThat(savedRedactionLogEntry.get().getTextBefore()).isEqualTo(redactionLogEntry.getTextBefore());
|
||||
assertThat(savedRedactionLogEntry.get().getTextAfter()).isEqualTo(redactionLogEntry.getTextAfter());
|
||||
assertThat(savedRedactionLogEntry.get().getStartOffset()).isEqualTo(redactionLogEntry.getStartOffset());
|
||||
@ -485,8 +493,7 @@ public class RulesTest {
|
||||
assertThat(savedRedactionLogEntry.get().isExcluded()).isEqualTo(redactionLogEntry.isExcluded());
|
||||
|
||||
for (Position position : redactionLogEntry.getPositions()) {
|
||||
var savedRectangle = savedRedactionLogEntry.get()
|
||||
.getPositions()
|
||||
var savedRectangle = savedRedactionLogEntry.get().getPositions()
|
||||
.stream()
|
||||
.filter(r -> r.getPage() == position.getPageNumber())
|
||||
.filter(r -> r.getTopLeft().getX() == position.x())
|
||||
@ -498,17 +505,16 @@ public class RulesTest {
|
||||
}
|
||||
|
||||
for (Change change : redactionLogEntry.getChanges()) {
|
||||
var savedChange = savedRedactionLogEntry.get()
|
||||
.getChanges()
|
||||
var savedChange = savedRedactionLogEntry.get().getChanges()
|
||||
.stream()
|
||||
.filter(c -> c.getAnalysisNumber() == change.getAnalysisNumber()).filter(c -> c.getType().name().equals(change.getType().name()))
|
||||
.filter(c -> c.getAnalysisNumber() == change.getAnalysisNumber())
|
||||
.filter(c -> c.getType().name().equals(change.getType().name()))
|
||||
.findFirst();
|
||||
assertThat(savedChange).isPresent();
|
||||
}
|
||||
|
||||
for (ManualChange manualChange : redactionLogEntry.getManualChanges()) {
|
||||
var savedManualChange = savedRedactionLogEntry.get()
|
||||
.getManualChanges()
|
||||
var savedManualChange = savedRedactionLogEntry.get().getManualChanges()
|
||||
.stream()
|
||||
.filter(m -> m.getManualRedactionType().name().equals(manualChange.getManualRedactionType().name()))
|
||||
.filter(m -> m.getUserId().equalsIgnoreCase(manualChange.getUserId()))
|
||||
@ -517,7 +523,8 @@ public class RulesTest {
|
||||
assertThat(savedManualChange).isPresent();
|
||||
}
|
||||
|
||||
assertThat(savedRedactionLogEntry.get().getEngines()).containsExactlyInAnyOrder(redactionLogEntry.getEngines().toArray(Engine[]::new));
|
||||
assertThat(savedRedactionLogEntry.get().getEngines()).containsExactlyInAnyOrder(redactionLogEntry.getEngines()
|
||||
.toArray(Engine[]::new));
|
||||
|
||||
assertThat(savedRedactionLogEntry.get().getReference()).containsAll(redactionLogEntry.getReference());
|
||||
assertThat(savedRedactionLogEntry.get().getImportedRedactionIntersections()).containsAll(redactionLogEntry.getImportedRedactionIntersections());
|
||||
@ -622,8 +629,8 @@ public class RulesTest {
|
||||
|
||||
ClassPathResource responseJson = new ClassPathResource("files/ner_response.json");
|
||||
storageService.storeObject(TenantContext.getTenantId(),
|
||||
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES),
|
||||
responseJson.getInputStream());
|
||||
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES),
|
||||
responseJson.getInputStream());
|
||||
}
|
||||
|
||||
|
||||
@ -647,8 +654,8 @@ public class RulesTest {
|
||||
.build();
|
||||
|
||||
storageService.storeObject(TenantContext.getTenantId(),
|
||||
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES),
|
||||
new ClassPathResource("files/cv_service_empty_response.json").getInputStream());
|
||||
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES),
|
||||
new ClassPathResource("files/cv_service_empty_response.json").getInputStream());
|
||||
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), stream);
|
||||
|
||||
return request;
|
||||
@ -659,45 +666,102 @@ public class RulesTest {
|
||||
private void loadDictionaryForTest() {
|
||||
|
||||
dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/CBI_author.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/CBI_author.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(VERTEBRATE, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/vertebrate.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/vertebrate.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(ADDRESS, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/CBI_address.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/CBI_address.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(HINT_ONLY, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/hint_only.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/hint_only.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(MUST_REDACT, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/must_redact.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/must_redact.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(PUBLISHED_INFORMATION, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/published_information.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/published_information.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(TEST_METHOD, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/test_method.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/test_method.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(PII, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/PII.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/PII.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/purity.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/purity.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(OCR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(LOGO, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(SIGNATURE, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(FORMULA, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>());
|
||||
|
||||
falsePositive.computeIfAbsent(PII, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
.addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
|
||||
}
|
||||
|
||||
@ -736,12 +800,12 @@ public class RulesTest {
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(AUTHOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(SPONSOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR,
|
||||
false));
|
||||
false));
|
||||
when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(HINT_ONLY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(HINT_ONLY, false));
|
||||
when(dictionaryClient.getDictionaryForType(MUST_REDACT + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(MUST_REDACT, false));
|
||||
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION,
|
||||
false));
|
||||
false));
|
||||
when(dictionaryClient.getDictionaryForType(TEST_METHOD + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(TEST_METHOD, false));
|
||||
when(dictionaryClient.getDictionaryForType(PII + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PII, false));
|
||||
when(dictionaryClient.getDictionaryForType(PURITY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PURITY, false));
|
||||
@ -809,7 +873,10 @@ public class RulesTest {
|
||||
protected LayoutParsingFinishedEvent analyzeDocumentStructure(LayoutParsingType layoutParsingType) {
|
||||
|
||||
return layoutParsingPipeline.parseLayoutAndSaveFilesToStorage(LayoutParsingRequestProvider.build(layoutParsingType,
|
||||
AnalyzeRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build()));
|
||||
AnalyzeRequest.builder()
|
||||
.dossierId(TEST_DOSSIER_ID)
|
||||
.fileId(TEST_FILE_ID)
|
||||
.build()));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
package com.iqser.red.service.redaction.v1.server.annotate;
|
||||
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
|
||||
|
||||
@ -79,8 +79,7 @@ public class AnnotationService {
|
||||
|
||||
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
|
||||
pdDocument.save(byteArrayOutputStream);
|
||||
return AnnotateResponse.builder().document(byteArrayOutputStream.toByteArray())
|
||||
.build();
|
||||
return AnnotateResponse.builder().document(byteArrayOutputStream.toByteArray()).build();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -13,10 +13,7 @@ public class TextEntityTest {
|
||||
@Test
|
||||
public void testMatchedRule() {
|
||||
|
||||
PrecursorEntity entity = PrecursorEntity.builder()
|
||||
.type("PII")
|
||||
.entityType(EntityType.ENTITY)
|
||||
.build();
|
||||
PrecursorEntity entity = PrecursorEntity.builder().type("PII").entityType(EntityType.ENTITY).build();
|
||||
entity.skip("CBI.1.0", "");
|
||||
entity.skip("CBI.2.0", "");
|
||||
entity.skip("CBI.3.0", "");
|
||||
@ -27,14 +24,10 @@ public class TextEntityTest {
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void testMatchedRuleWithNonsense() {
|
||||
|
||||
PrecursorEntity entity = PrecursorEntity.builder()
|
||||
.type("PII")
|
||||
.entityType(EntityType.ENTITY)
|
||||
.build();
|
||||
PrecursorEntity entity = PrecursorEntity.builder().type("PII").entityType(EntityType.ENTITY).build();
|
||||
assertThrows(IllegalArgumentException.class, () -> {
|
||||
entity.skip("", "");
|
||||
});
|
||||
|
||||
@ -49,19 +49,26 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
|
||||
entityCreationService = new EntityCreationService(entityEnrichmentService, kieSession);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void assertEntitiesAreDuplicatedWithTheirTableCell() {
|
||||
|
||||
Document document = buildGraph("files/Minimal Examples/Meto1_Page22.pdf");
|
||||
List<TextEntity> entities = entityCreationService.byString("Surface Water", "test", EntityType.ENTITY, document).toList();
|
||||
List<TextEntity> entities = entityCreationService.byString("Surface Water", "test", EntityType.ENTITY, document)
|
||||
.toList();
|
||||
assertEquals(3, entities.size());
|
||||
assertEquals(1, entities.stream().distinct().count());
|
||||
assertEquals(1,
|
||||
entities.stream()
|
||||
.distinct()
|
||||
.count());
|
||||
assertEquals(2, entities.get(0).getDuplicateTextRanges().size());
|
||||
|
||||
var node = entities.get(0).getDeepestFullyContainingNode();
|
||||
|
||||
assertTrue(node.getTextRange().contains(entities.get(0).getTextRange()));
|
||||
assertTrue(entities.get(0).getDuplicateTextRanges().stream().allMatch(tr -> node.getTextRange().contains(tr)));
|
||||
assertTrue(entities.get(0).getDuplicateTextRanges()
|
||||
.stream()
|
||||
.allMatch(tr -> node.getTextRange().contains(tr)));
|
||||
}
|
||||
|
||||
|
||||
@ -84,8 +91,10 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
|
||||
|
||||
Document document = buildGraph("files/new/crafted document.pdf");
|
||||
String type = "CBI_author";
|
||||
assertTrue(entityCreationService.byTextRange(new TextRange(0, 10), type, EntityType.ENTITY, document).isPresent());
|
||||
assertTrue(entityCreationService.byTextRange(new TextRange(0, 10), type, EntityType.ENTITY, document).isPresent());
|
||||
assertTrue(entityCreationService.byTextRange(new TextRange(0, 10), type, EntityType.ENTITY, document)
|
||||
.isPresent());
|
||||
assertTrue(entityCreationService.byTextRange(new TextRange(0, 10), type, EntityType.ENTITY, document)
|
||||
.isPresent());
|
||||
assertEquals(1, document.getEntities().size());
|
||||
verify(kieSession, times(1)).insert(any(TextEntity.class));
|
||||
}
|
||||
@ -114,7 +123,7 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
|
||||
assertEquals("’s Donut ←", textEntity.getTextAfter());
|
||||
assertEquals(searchTerm, textEntity.getValue());
|
||||
assertEquals("Rule 5: Do not redact genitive CBI_authors (Entries based on Dict) ",
|
||||
textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
|
||||
textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
|
||||
assertEquals(3, textEntity.getIntersectingNodes().size());
|
||||
assertEquals(5, textEntity.getDeepestFullyContainingNode().getNumberOnPage());
|
||||
assertInstanceOf(Paragraph.class, textEntity.getDeepestFullyContainingNode());
|
||||
@ -181,11 +190,18 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
|
||||
.allEntriesInOrder()//
|
||||
.filter(entry -> entry.getType().equals(NodeType.TABLE))//
|
||||
.map(DocumentTree.Entry::getNode)//
|
||||
.findFirst().orElseThrow();
|
||||
.findFirst()
|
||||
.orElseThrow();
|
||||
assertEquals(5, table.getNumberOfCols());
|
||||
assertEquals(4, table.getNumberOfRows());
|
||||
assertEquals(5, table.streamHeaders().toList().size());
|
||||
CharSequence firstHeader = table.streamHeadersForCell(1, 1).map(TableCell::getTextBlock).map(TextBlock::getSearchText).findFirst().orElseThrow();
|
||||
assertEquals(5,
|
||||
table.streamHeaders()
|
||||
.toList().size());
|
||||
CharSequence firstHeader = table.streamHeadersForCell(1, 1)
|
||||
.map(TableCell::getTextBlock)
|
||||
.map(TextBlock::getSearchText)
|
||||
.findFirst()
|
||||
.orElseThrow();
|
||||
assertEquals("Author(s)", firstHeader.toString().stripTrailing());
|
||||
}
|
||||
|
||||
@ -207,17 +223,23 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
|
||||
public void assertTableStructureMetolachlor() {
|
||||
|
||||
Document document = buildGraph("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06");
|
||||
Table table = (Table) document.getDocumentTree()
|
||||
.allEntriesInOrder()
|
||||
.filter(entry -> entry.getNode().getPages().stream().anyMatch(page -> page.getNumber() == 22))
|
||||
Table table = (Table) document.getDocumentTree().allEntriesInOrder()
|
||||
.filter(entry -> entry.getNode().getPages()
|
||||
.stream()
|
||||
.anyMatch(page -> page.getNumber() == 22))
|
||||
.filter(entry -> entry.getType().equals(NodeType.TABLE))
|
||||
.map(DocumentTree.Entry::getNode)
|
||||
.findFirst()
|
||||
.orElseThrow();
|
||||
assertEquals(5, table.getNumberOfCols());
|
||||
assertEquals(14, table.getNumberOfRows());
|
||||
assertEquals(10, table.streamHeaders().toList().size());
|
||||
List<String> twoHeaders = table.streamHeadersForCell(2, 1).map(TableCell::getTextBlock).map(TextBlock::getSearchText).toList();
|
||||
assertEquals(10,
|
||||
table.streamHeaders()
|
||||
.toList().size());
|
||||
List<String> twoHeaders = table.streamHeadersForCell(2, 1)
|
||||
.map(TableCell::getTextBlock)
|
||||
.map(TextBlock::getSearchText)
|
||||
.toList();
|
||||
assertEquals(2, twoHeaders.size());
|
||||
assertEquals("Component of residue definition: S-Metolachlor", twoHeaders.get(0).stripTrailing());
|
||||
assertEquals("Method type", twoHeaders.get(1).stripTrailing());
|
||||
@ -233,12 +255,13 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
|
||||
|
||||
assertEquals("except Cranberry; Vegetable, ", textEntity.getTextBefore());
|
||||
assertEquals(", Group 9;", textEntity.getTextAfter());
|
||||
assertEquals("1.1.4 Evaluations carried out under other regulatory contexts ",
|
||||
textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
|
||||
assertEquals("1.1.4 Evaluations carried out under other regulatory contexts ", textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
|
||||
assertEquals(searchTerm, textEntity.getValue());
|
||||
assertEquals(3, textEntity.getIntersectingNodes().size());
|
||||
assertEquals(5, textEntity.getDeepestFullyContainingNode().getNumberOnPage());
|
||||
assertTrue(textEntity.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 10));
|
||||
assertTrue(textEntity.getPages()
|
||||
.stream()
|
||||
.allMatch(pageNode -> pageNode.getNumber() == 10));
|
||||
assertInstanceOf(Paragraph.class, textEntity.getDeepestFullyContainingNode());
|
||||
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
|
||||
@ -262,11 +285,13 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
|
||||
assertEquals("2.6.1 Summary of ", textEntity.getTextBefore());
|
||||
assertEquals(" and excretion in", textEntity.getTextAfter());
|
||||
assertEquals("2.6.1 Summary of absorption, distribution, metabolism and excretion in mammals ",
|
||||
textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
|
||||
textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
|
||||
assertEquals(searchTerm, textEntity.getValue());
|
||||
assertEquals(3, textEntity.getIntersectingNodes().size());
|
||||
assertEquals(4, textEntity.getDeepestFullyContainingNode().getNumberOnPage());
|
||||
assertTrue(textEntity.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 33));
|
||||
assertTrue(textEntity.getPages()
|
||||
.stream()
|
||||
.allMatch(pageNode -> pageNode.getNumber() == 33));
|
||||
assertInstanceOf(Headline.class, textEntity.getDeepestFullyContainingNode());
|
||||
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
|
||||
@ -285,8 +310,10 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
|
||||
assertEquals(searchTerm, textEntity.getValue());
|
||||
assertEquals(4, textEntity.getIntersectingNodes().size());
|
||||
assertEquals("Table 2.7-1: List of substances and metabolites and related structural formula ",
|
||||
textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
|
||||
assertTrue(textEntity.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 54));
|
||||
textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
|
||||
assertTrue(textEntity.getPages()
|
||||
.stream()
|
||||
.allMatch(pageNode -> pageNode.getNumber() == 54));
|
||||
assertEquals(26, textEntity.getDeepestFullyContainingNode().getNumberOnPage());
|
||||
|
||||
assertInstanceOf(TableCell.class, textEntity.getDeepestFullyContainingNode());
|
||||
@ -298,7 +325,8 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
|
||||
// this might fail, if an entity with the same name exists twice in the deepest containing node
|
||||
private static void assertSameOffsetInAllIntersectingNodes(String searchTerm, TextEntity textEntity) {
|
||||
|
||||
List<Integer> paragraphStart = textEntity.getIntersectingNodes().stream()//
|
||||
List<Integer> paragraphStart = textEntity.getIntersectingNodes()
|
||||
.stream()//
|
||||
.map(SemanticNode::getTextBlock)//
|
||||
.map(textBlock -> textBlock.indexOf(searchTerm, textEntity.getDeepestFullyContainingNode().getTextRange().start()))//
|
||||
.toList();
|
||||
@ -316,14 +344,23 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
|
||||
TextRange textRange = new TextRange(start, start + searchTerm.length());
|
||||
TextEntity textEntity = TextEntity.initialEntityNode(textRange, "123", EntityType.ENTITY, document);
|
||||
entityCreationService.addEntityToGraph(textEntity, document);
|
||||
Page pageNode = document.getPages().stream().filter(page -> page.getNumber() == pageNumber).findFirst().orElseThrow();
|
||||
Page pageNode = document.getPages()
|
||||
.stream()
|
||||
.filter(page -> page.getNumber() == pageNumber)
|
||||
.findFirst()
|
||||
.orElseThrow();
|
||||
|
||||
assertEquals(textEntity.getValue(), searchTerm);
|
||||
assertTrue(pageNode.getEntities().contains(textEntity));
|
||||
assertTrue(document.getPages().stream().filter(page -> page != pageNode).noneMatch(page -> page.getEntities().contains(textEntity)));
|
||||
assertTrue(document.getPages()
|
||||
.stream()
|
||||
.filter(page -> page != pageNode)
|
||||
.noneMatch(page -> page.getEntities().contains(textEntity)));
|
||||
assertTrue(textEntity.getPages().contains(pageNode));
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
|
||||
assertTrue(textEntity.getIntersectingNodes().stream().allMatch(node -> node.getEntities().contains(textEntity)));
|
||||
assertTrue(textEntity.getIntersectingNodes()
|
||||
.stream()
|
||||
.allMatch(node -> node.getEntities().contains(textEntity)));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -112,15 +112,15 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, true)).thenReturn(List.of(Type.builder()
|
||||
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.type(DOSSIER_REDACTIONS_INDICATOR)
|
||||
.dossierTemplateId(TEST_DOSSIER_ID)
|
||||
.hexColor("#ffe187")
|
||||
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.build()));
|
||||
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.type(DOSSIER_REDACTIONS_INDICATOR)
|
||||
.dossierTemplateId(TEST_DOSSIER_ID)
|
||||
.hexColor("#ffe187")
|
||||
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.build()));
|
||||
|
||||
mockDictionaryCalls(null);
|
||||
|
||||
@ -155,11 +155,11 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
|
||||
|
||||
long droolsStart = System.currentTimeMillis();
|
||||
List<FileAttribute> fileAttributes = entityDroolsExecutionService.executeRules(kieContainer,
|
||||
document,
|
||||
dictionary,
|
||||
Collections.emptyList(),
|
||||
new ManualRedactions(),
|
||||
new NerEntities());
|
||||
document,
|
||||
dictionary,
|
||||
Collections.emptyList(),
|
||||
new ManualRedactions(),
|
||||
new NerEntities());
|
||||
System.out.printf("Firing rules took %d ms\n", System.currentTimeMillis() - droolsStart);
|
||||
|
||||
System.out.printf("Total time %d ms\n", System.currentTimeMillis() - dictionarySearchStart);
|
||||
@ -181,7 +181,8 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
|
||||
float durationMillis = ((float) (System.currentTimeMillis() - start));
|
||||
System.out.printf("%d calls of buildTextBlock() on document took %f s, average is %f ms\n", n, durationMillis / 1000, durationMillis / n);
|
||||
|
||||
Section section = document.getMainSections().get(8);
|
||||
Section section = document.getMainSections()
|
||||
.get(8);
|
||||
start = System.currentTimeMillis();
|
||||
for (int i = 0; i < n; i++) {
|
||||
section.getTextBlock();
|
||||
@ -245,11 +246,19 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
|
||||
System.out.printf("%d Insertions took %s s, average %.2f ms\n", numberOfRuns, ((float) totalInsertTime / 1000), totalInsertTime / numberOfRuns);
|
||||
System.out.printf("Found %d entities and saved %d\n", foundEntities.size(), document.getEntities().size());
|
||||
for (TextEntity entity : document.getEntities()) {
|
||||
var foundEntity = foundEntities.stream().filter(f -> f.getId().equals(entity.getId())).findFirst().get();
|
||||
var foundEntity = foundEntities.stream()
|
||||
.filter(f -> f.getId().equals(entity.getId()))
|
||||
.findFirst()
|
||||
.get();
|
||||
assertTrue(foundEntity.getTextRange().equals(entity.getTextRange()) || foundEntity.getDuplicateTextRanges().contains(entity.getTextRange()));
|
||||
}
|
||||
assert document.getEntities().stream().mapToInt(e -> e.getDuplicateTextRanges().size() + 1).sum() == foundEntities.size();
|
||||
assert foundEntities.stream().map(TextEntity::getId).distinct().count() == document.getEntities().size();
|
||||
assert document.getEntities()
|
||||
.stream()
|
||||
.mapToInt(e -> e.getDuplicateTextRanges().size() + 1).sum() == foundEntities.size();
|
||||
assert foundEntities.stream()
|
||||
.map(TextEntity::getId)
|
||||
.distinct()
|
||||
.count() == document.getEntities().size();
|
||||
drawAllEntities(filename, document);
|
||||
}
|
||||
|
||||
@ -265,9 +274,11 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
|
||||
.stream()
|
||||
.filter(entityNode -> !entityNode.removed())
|
||||
.filter(TextEntity::applied)
|
||||
.flatMap(entityNode -> entityNode.getPositionsOnPagePerPage().stream())
|
||||
.flatMap(entityNode -> entityNode.getPositionsOnPagePerPage()
|
||||
.stream())
|
||||
.filter(entityPosition -> entityPosition.getPage().equals(page))
|
||||
.flatMap(entityPosition -> entityPosition.getRectanglePerLine().stream())
|
||||
.flatMap(entityPosition -> entityPosition.getRectanglePerLine()
|
||||
.stream())
|
||||
.toList();
|
||||
|
||||
PdfVisualisationUtility.Options options = PdfVisualisationUtility.Options.builder().strokeColor(Color.BLACK).stroke(true).build();
|
||||
@ -279,9 +290,11 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
|
||||
.stream()
|
||||
.filter(entityNode -> !entityNode.removed())
|
||||
.filter(entityNode -> !entityNode.applied())
|
||||
.flatMap(entityNode -> entityNode.getPositionsOnPagePerPage().stream())
|
||||
.flatMap(entityNode -> entityNode.getPositionsOnPagePerPage()
|
||||
.stream())
|
||||
.filter(entityPosition -> entityPosition.getPage().equals(page))
|
||||
.flatMap(entityPosition -> entityPosition.getRectanglePerLine().stream())
|
||||
.flatMap(entityPosition -> entityPosition.getRectanglePerLine()
|
||||
.stream())
|
||||
.toList();
|
||||
|
||||
PdfVisualisationUtility.Options options = PdfVisualisationUtility.Options.builder().strokeColor(Color.BLUE).stroke(true).build();
|
||||
|
||||
@ -15,8 +15,17 @@ public class DocumentTableIntegrationTest extends BuildDocumentIntegrationTest {
|
||||
public void testAllTableCellAccessesCorrect() {
|
||||
|
||||
Document document = buildGraph("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06");
|
||||
Page pageFive = document.getPages().stream().filter(pageNode -> pageNode.getNumber() == 5).findFirst().get();
|
||||
Table table = pageFive.getMainBody().stream().filter(semanticNode -> semanticNode instanceof Table).map(semanticNode -> (Table) semanticNode).findFirst().get();
|
||||
Page pageFive = document.getPages()
|
||||
.stream()
|
||||
.filter(pageNode -> pageNode.getNumber() == 5)
|
||||
.findFirst()
|
||||
.get();
|
||||
Table table = pageFive.getMainBody()
|
||||
.stream()
|
||||
.filter(semanticNode -> semanticNode instanceof Table)
|
||||
.map(semanticNode -> (Table) semanticNode)
|
||||
.findFirst()
|
||||
.get();
|
||||
|
||||
for (int row = 0; row < table.getNumberOfRows(); row++) {
|
||||
for (int col = 0; col < table.getNumberOfCols(); col++) {
|
||||
|
||||
@ -83,8 +83,8 @@ public class DocumentVisualizationIntegrationTest extends BuildDocumentIntegrati
|
||||
|
||||
PdfVisualisationUtility.drawDocumentGraph(pdDocument, document);
|
||||
PdfVisualisationUtility.drawTextBlock(pdDocument,
|
||||
textBlock,
|
||||
PdfVisualisationUtility.Options.builder().stroke(true).strokeWidth(0.1f).strokeColor(Color.YELLOW).build());
|
||||
textBlock,
|
||||
PdfVisualisationUtility.Options.builder().stroke(true).strokeWidth(0.1f).strokeColor(Color.YELLOW).build());
|
||||
File outputFile = new File(tmpFileName);
|
||||
pdDocument.save(outputFile);
|
||||
}
|
||||
|
||||
@ -27,7 +27,8 @@ public class SearchImplementationTest extends BuildDocumentIntegrationTest {
|
||||
|
||||
SearchImplementation searchImplementation = new SearchImplementation(List.of("mydossierredaction"), true);
|
||||
EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService);
|
||||
List<TextEntity> entities = entityCreationService.bySearchImplementation(searchImplementation, "dossier_redaction", EntityType.ENTITY, document).toList();
|
||||
List<TextEntity> entities = entityCreationService.bySearchImplementation(searchImplementation, "dossier_redaction", EntityType.ENTITY, document)
|
||||
.toList();
|
||||
assertEquals(2, entities.size());
|
||||
}
|
||||
|
||||
|
||||
@ -79,6 +79,7 @@ class DroolsSyntaxValidationServiceTest {
|
||||
assertTrue(droolsSyntaxValidation.isCompiled());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
void testRulesWithAddedImports() {
|
||||
@ -96,6 +97,7 @@ class DroolsSyntaxValidationServiceTest {
|
||||
assertTrue(droolsSyntaxValidation.isCompiled());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
void testDocumineRules() {
|
||||
|
||||
@ -26,7 +26,9 @@ public class DroolsUpToDateTest {
|
||||
public void assertAllRuleFilesAreUpToDate() {
|
||||
|
||||
Path droolsPath = new ClassPathResource("drools").getFile().toPath();
|
||||
Files.walk(droolsPath).filter(DroolsUpToDateTest::isEntityRuleFile).forEach(this::validateFile);
|
||||
Files.walk(droolsPath)
|
||||
.filter(DroolsUpToDateTest::isEntityRuleFile)
|
||||
.forEach(this::validateFile);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -127,15 +127,15 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder()
|
||||
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.type(DOSSIER_REDACTIONS_INDICATOR)
|
||||
.dossierTemplateId(TEST_DOSSIER_ID)
|
||||
.hexColor("#ffe187")
|
||||
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.build()));
|
||||
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.type(DOSSIER_REDACTIONS_INDICATOR)
|
||||
.dossierTemplateId(TEST_DOSSIER_ID)
|
||||
.hexColor("#ffe187")
|
||||
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.build()));
|
||||
|
||||
mockDictionaryCalls(null);
|
||||
|
||||
@ -155,29 +155,39 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest {
|
||||
String testEntityValue1 = "Desiree";
|
||||
String testEntityValue2 = "Melanie";
|
||||
EntityLog redactionLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
assertEquals(2, redactionLog.getEntityLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue1)).count());
|
||||
assertEquals(2, redactionLog.getEntityLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue2)).count());
|
||||
assertEquals(2,
|
||||
redactionLog.getEntityLogEntry()
|
||||
.stream()
|
||||
.filter(entry -> entry.getValue().equals(testEntityValue1))
|
||||
.count());
|
||||
assertEquals(2,
|
||||
redactionLog.getEntityLogEntry()
|
||||
.stream()
|
||||
.filter(entry -> entry.getValue().equals(testEntityValue2))
|
||||
.count());
|
||||
|
||||
Document document = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(TEST_DOSSIER_ID, TEST_FILE_ID));
|
||||
String expandedEntityKeyword = "Lorem ipsum dolor sit amet, consectetur adipiscing elit Desiree et al sed do eiusmod tempor incididunt ut labore et dolore magna aliqua Melanie et al. Reference No 12345 Lorem ipsum.";
|
||||
entityCreationService.byString(expandedEntityKeyword, "PII", EntityType.ENTITY, document).findFirst().get();
|
||||
entityCreationService.byString(expandedEntityKeyword, "PII", EntityType.ENTITY, document)
|
||||
.findFirst()
|
||||
.get();
|
||||
|
||||
String idToResize = redactionLog.getEntityLogEntry()
|
||||
.stream()
|
||||
.filter(entry -> entry.getValue().equals(testEntityValue1))
|
||||
.max(Comparator.comparingInt(EntityLogEntry::getStartOffset))
|
||||
.get()
|
||||
.getId();
|
||||
.get().getId();
|
||||
ManualRedactions manualRedactions = new ManualRedactions();
|
||||
manualRedactions.getResizeRedactions().add(ManualResizeRedaction.builder()
|
||||
.annotationId(idToResize)
|
||||
.value(expandedEntityKeyword)
|
||||
.positions(List.of(Rectangle.builder().topLeftX(56.8f).topLeftY(454.664f).height(15.408f).width(493.62f).page(3).build(),
|
||||
Rectangle.builder().topLeftX(56.8f).topLeftY(440.864f).height(15.408f).width(396f).page(3).build()))
|
||||
.addToAllDossiers(false)
|
||||
.updateDictionary(false)
|
||||
.requestDate(OffsetDateTime.now())
|
||||
.build());
|
||||
manualRedactions.getResizeRedactions()
|
||||
.add(ManualResizeRedaction.builder()
|
||||
.annotationId(idToResize)
|
||||
.value(expandedEntityKeyword)
|
||||
.positions(List.of(Rectangle.builder().topLeftX(56.8f).topLeftY(454.664f).height(15.408f).width(493.62f).page(3).build(),
|
||||
Rectangle.builder().topLeftX(56.8f).topLeftY(440.864f).height(15.408f).width(396f).page(3).build()))
|
||||
.addToAllDossiers(false)
|
||||
.updateDictionary(false)
|
||||
.requestDate(OffsetDateTime.now())
|
||||
.build());
|
||||
request.setManualRedactions(manualRedactions);
|
||||
analyzeService.reanalyze(request);
|
||||
|
||||
@ -188,21 +198,32 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest {
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream(tmpFile)) {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
EntityLogEntry resizedEntry = redactionLog.getEntityLogEntry().stream().filter(entry -> entry.getValue().equals(expandedEntityKeyword)).findFirst().get();
|
||||
EntityLogEntry resizedEntry = redactionLog.getEntityLogEntry()
|
||||
.stream()
|
||||
.filter(entry -> entry.getValue().equals(expandedEntityKeyword))
|
||||
.findFirst()
|
||||
.get();
|
||||
assertEquals(idToResize, resizedEntry.getId());
|
||||
assertEquals(1, redactionLog.getEntityLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue1)).count());
|
||||
assertEquals(1,
|
||||
redactionLog.getEntityLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue2) && !entry.getState().equals(EntryState.REMOVED)).count());
|
||||
redactionLog.getEntityLogEntry()
|
||||
.stream()
|
||||
.filter(entry -> entry.getValue().equals(testEntityValue1))
|
||||
.count());
|
||||
assertEquals(1,
|
||||
redactionLog.getEntityLogEntry()
|
||||
.stream()
|
||||
.filter(entry -> entry.getValue().equals(testEntityValue2) && !entry.getState().equals(EntryState.REMOVED))
|
||||
.count());
|
||||
}
|
||||
|
||||
|
||||
private static com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle toAnnotationRectangle(Rectangle2D rectangle2D, int pageNumber) {
|
||||
|
||||
return new com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle((float) rectangle2D.getMaxX(),
|
||||
(float) rectangle2D.getMaxY() - (float) rectangle2D.getHeight(),
|
||||
(float) rectangle2D.getWidth(),
|
||||
-(float) rectangle2D.getHeight(),
|
||||
pageNumber);
|
||||
(float) rectangle2D.getMaxY() - (float) rectangle2D.getHeight(),
|
||||
(float) rectangle2D.getWidth(),
|
||||
-(float) rectangle2D.getHeight(),
|
||||
pageNumber);
|
||||
}
|
||||
|
||||
|
||||
@ -219,10 +240,10 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder().annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf").fileId("fileId").build()));
|
||||
manualRedactions.setForceRedactions(Set.of(ManualForceRedaction.builder()
|
||||
.annotationId("675eba69b0c2917de55462c817adaa05")
|
||||
.fileId("fileId")
|
||||
.legalBasis("Something")
|
||||
.build()));
|
||||
.annotationId("675eba69b0c2917de55462c817adaa05")
|
||||
.fileId("fileId")
|
||||
.legalBasis("Something")
|
||||
.build()));
|
||||
|
||||
ManualRedactionEntry manualRedactionEntry = new ManualRedactionEntry();
|
||||
manualRedactionEntry.setAnnotationId(manualAddId);
|
||||
@ -232,7 +253,7 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest {
|
||||
manualRedactionEntry.setValue("O'Loughlin C.K.");
|
||||
manualRedactionEntry.setReason("Manual Redaction");
|
||||
manualRedactionEntry.setPositions(List.of(Rectangle.builder().topLeftX(375.61096f).topLeftY(241.282f).width(7.648041f).height(43.72262f).page(1).build(),
|
||||
Rectangle.builder().topLeftX(384.83517f).topLeftY(241.282f).width(7.648041f).height(17.043358f).page(1).build()));
|
||||
Rectangle.builder().topLeftX(384.83517f).topLeftY(241.282f).width(7.648041f).height(17.043358f).page(1).build()));
|
||||
|
||||
AnalyzeRequest request = uploadFileToStorage(pdfFile);
|
||||
request.setManualRedactions(manualRedactions);
|
||||
@ -242,11 +263,11 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest {
|
||||
manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
|
||||
manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder().annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf").fileId("fileId").build()));
|
||||
manualRedactions.setLegalBasisChanges((Set.of(ManualLegalBasisChange.builder()
|
||||
.annotationId("675eba69b0c2917de55462c817adaa05")
|
||||
.fileId("fileId")
|
||||
.legalBasis("Manual Legal Basis Change")
|
||||
.requestDate(OffsetDateTime.now())
|
||||
.build())));
|
||||
.annotationId("675eba69b0c2917de55462c817adaa05")
|
||||
.fileId("fileId")
|
||||
.legalBasis("Manual Legal Basis Change")
|
||||
.requestDate(OffsetDateTime.now())
|
||||
.build())));
|
||||
|
||||
analyzeService.reanalyze(request);
|
||||
|
||||
@ -295,7 +316,10 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
var optionalEntry = redactionLog.getEntityLogEntry().stream().filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)).findAny();
|
||||
var optionalEntry = redactionLog.getEntityLogEntry()
|
||||
.stream()
|
||||
.filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId))
|
||||
.findAny();
|
||||
assertTrue(optionalEntry.isPresent());
|
||||
assertEquals(2, optionalEntry.get().getContainingNodeId().size()); // 2 is the depth of the table instead of the table cell
|
||||
System.out.println("duration: " + (end - start));
|
||||
@ -345,9 +369,9 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
EntityLog redactionLog2 = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
assertFalse(redactionLog2.getEntityLogEntry()
|
||||
.stream()
|
||||
.filter(entry -> entry.getType().equals("published_information"))
|
||||
.anyMatch(entry -> entry.getValue().equals("Oxford University Press")));
|
||||
.stream()
|
||||
.filter(entry -> entry.getType().equals("published_information"))
|
||||
.anyMatch(entry -> entry.getValue().equals("Oxford University Press")));
|
||||
|
||||
var oxfordUniversityPressRecategorized = redactionLog2.getEntityLogEntry()
|
||||
.stream()
|
||||
@ -381,15 +405,15 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest {
|
||||
String annotationId = "testAnnotationId";
|
||||
|
||||
manualRedactions.setEntriesToAdd(Set.of(ManualRedactionEntry.builder()
|
||||
.annotationId(annotationId)
|
||||
.requestDate(OffsetDateTime.now())
|
||||
.type("manual")
|
||||
.value("Expand to Hint Clarissa’s Donut ← not added to Dict, should be not annotated Simpson's Tower ← added to Authors-Dict, should be annotated")
|
||||
.positions(List.of(//
|
||||
new Rectangle(new Point(56.8f, 496.27f), 61.25f, 12.83f, 2), //
|
||||
new Rectangle(new Point(56.8f, 482.26f), 303.804f, 15.408f, 2), //
|
||||
new Rectangle(new Point(56.8f, 468.464f), 314.496f, 15.408f, 2))) //
|
||||
.build()));
|
||||
.annotationId(annotationId)
|
||||
.requestDate(OffsetDateTime.now())
|
||||
.type("manual")
|
||||
.value("Expand to Hint Clarissa’s Donut ← not added to Dict, should be not annotated Simpson's Tower ← added to Authors-Dict, should be annotated")
|
||||
.positions(List.of(//
|
||||
new Rectangle(new Point(56.8f, 496.27f), 61.25f, 12.83f, 2), //
|
||||
new Rectangle(new Point(56.8f, 482.26f), 303.804f, 15.408f, 2), //
|
||||
new Rectangle(new Point(56.8f, 468.464f), 314.496f, 15.408f, 2))) //
|
||||
.build()));
|
||||
ManualResizeRedaction manualResizeRedaction = ManualResizeRedaction.builder()
|
||||
.annotationId(annotationId)
|
||||
.requestDate(OffsetDateTime.now())
|
||||
|
||||
@ -32,18 +32,31 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest {
|
||||
public void manualResizeRedactionTest() {
|
||||
|
||||
Document document = buildGraph("files/new/crafted document");
|
||||
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
|
||||
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document)
|
||||
.collect(Collectors.toUnmodifiableSet());
|
||||
Set<TextEntity> biggerEntities = entityCreationService.byString("David Ksenia Max Mustermann", "CBI_author", EntityType.ENTITY, document)
|
||||
.collect(Collectors.toUnmodifiableSet());
|
||||
|
||||
TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
|
||||
TextEntity biggerEntity = biggerEntities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
|
||||
TextEntity entity = entities.stream()
|
||||
.filter(e -> e.getPages()
|
||||
.stream()
|
||||
.anyMatch(p -> p.getNumber() == 1))
|
||||
.findFirst()
|
||||
.get();
|
||||
TextEntity biggerEntity = biggerEntities.stream()
|
||||
.filter(e -> e.getPages()
|
||||
.stream()
|
||||
.anyMatch(p -> p.getNumber() == 1))
|
||||
.findFirst()
|
||||
.get();
|
||||
|
||||
String initialId = entity.getPositionsOnPagePerPage().get(0).getId();
|
||||
String initialId = entity.getPositionsOnPagePerPage()
|
||||
.get(0).getId();
|
||||
ManualResizeRedaction manualResizeRedaction = ManualResizeRedaction.builder()
|
||||
.annotationId(initialId)
|
||||
.value(biggerEntity.getValue())
|
||||
.positions(toAnnotationRectangles(biggerEntity.getPositionsOnPagePerPage().get(0)))
|
||||
.positions(toAnnotationRectangles(biggerEntity.getPositionsOnPagePerPage()
|
||||
.get(0)))
|
||||
.requestDate(OffsetDateTime.now())
|
||||
.updateDictionary(false)
|
||||
.build();
|
||||
@ -55,8 +68,13 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest {
|
||||
assertTrue(Sets.difference(new HashSet<>(biggerEntity.getIntersectingNodes()), new HashSet<>(entity.getIntersectingNodes())).isEmpty());
|
||||
assertEquals(biggerEntity.getPages(), entity.getPages());
|
||||
assertEquals(biggerEntity.getValue(), entity.getValue());
|
||||
assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId());
|
||||
assertRectanglesAlmostEqual(biggerEntity.getPositionsOnPagePerPage().get(0).getRectanglePerLine(), entity.getPositionsOnPagePerPage().get(0).getRectanglePerLine());
|
||||
assertEquals(initialId,
|
||||
entity.getPositionsOnPagePerPage()
|
||||
.get(0).getId());
|
||||
assertRectanglesAlmostEqual(biggerEntity.getPositionsOnPagePerPage()
|
||||
.get(0).getRectanglePerLine(),
|
||||
entity.getPositionsOnPagePerPage()
|
||||
.get(0).getRectanglePerLine());
|
||||
assertTrue(entity.resized());
|
||||
}
|
||||
|
||||
@ -65,11 +83,18 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest {
|
||||
public void manualForceRedactionTest() {
|
||||
|
||||
Document document = buildGraph("files/new/crafted document");
|
||||
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
|
||||
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document)
|
||||
.collect(Collectors.toUnmodifiableSet());
|
||||
|
||||
TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
|
||||
TextEntity entity = entities.stream()
|
||||
.filter(e -> e.getPages()
|
||||
.stream()
|
||||
.anyMatch(p -> p.getNumber() == 1))
|
||||
.findFirst()
|
||||
.get();
|
||||
|
||||
String initialId = entity.getPositionsOnPagePerPage().get(0).getId();
|
||||
String initialId = entity.getPositionsOnPagePerPage()
|
||||
.get(0).getId();
|
||||
ManualForceRedaction manualForceRedaction = ManualForceRedaction.builder().annotationId(initialId).legalBasis("Something").requestDate(OffsetDateTime.now()).build();
|
||||
|
||||
doAnalysis(document, List.of(manualForceRedaction));
|
||||
@ -78,8 +103,12 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest {
|
||||
assertFalse(entity.getIntersectingNodes().isEmpty());
|
||||
assertEquals(1, entity.getPages().size());
|
||||
assertEquals("David Ksenia", entity.getValue());
|
||||
assertEquals("Something", entity.getManualOverwrite().getLegalBasis().orElse(entity.getMatchedRule().getLegalBasis()));
|
||||
assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId());
|
||||
assertEquals("Something",
|
||||
entity.getManualOverwrite().getLegalBasis()
|
||||
.orElse(entity.getMatchedRule().getLegalBasis()));
|
||||
assertEquals(initialId,
|
||||
entity.getPositionsOnPagePerPage()
|
||||
.get(0).getId());
|
||||
assertFalse(entity.removed());
|
||||
assertTrue(entity.hasManualChanges());
|
||||
assertTrue(entity.applied());
|
||||
@ -90,17 +119,26 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest {
|
||||
public void manualIDRemovalTest() {
|
||||
|
||||
Document document = buildGraph("files/new/crafted document");
|
||||
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
|
||||
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document)
|
||||
.collect(Collectors.toUnmodifiableSet());
|
||||
|
||||
TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
|
||||
TextEntity entity = entities.stream()
|
||||
.filter(e -> e.getPages()
|
||||
.stream()
|
||||
.anyMatch(p -> p.getNumber() == 1))
|
||||
.findFirst()
|
||||
.get();
|
||||
|
||||
String initialId = entity.getPositionsOnPagePerPage().get(0).getId();
|
||||
String initialId = entity.getPositionsOnPagePerPage()
|
||||
.get(0).getId();
|
||||
IdRemoval idRemoval = IdRemoval.builder().annotationId(initialId).requestDate(OffsetDateTime.now()).build();
|
||||
|
||||
doAnalysis(document, List.of(idRemoval));
|
||||
|
||||
assertEquals("David Ksenia", entity.getValue());
|
||||
assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId());
|
||||
assertEquals(initialId,
|
||||
entity.getPositionsOnPagePerPage()
|
||||
.get(0).getId());
|
||||
assertTrue(entity.ignored());
|
||||
}
|
||||
|
||||
@ -109,11 +147,18 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest {
|
||||
public void manualIDRemovalButAlsoForceRedactionTest() {
|
||||
|
||||
Document document = buildGraph("files/new/crafted document");
|
||||
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
|
||||
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document)
|
||||
.collect(Collectors.toUnmodifiableSet());
|
||||
|
||||
TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
|
||||
TextEntity entity = entities.stream()
|
||||
.filter(e -> e.getPages()
|
||||
.stream()
|
||||
.anyMatch(p -> p.getNumber() == 1))
|
||||
.findFirst()
|
||||
.get();
|
||||
|
||||
String initialId = entity.getPositionsOnPagePerPage().get(0).getId();
|
||||
String initialId = entity.getPositionsOnPagePerPage()
|
||||
.get(0).getId();
|
||||
IdRemoval idRemoval = IdRemoval.builder().annotationId(initialId).requestDate(OffsetDateTime.now()).build();
|
||||
ManualForceRedaction manualForceRedaction = ManualForceRedaction.builder().annotationId(initialId).legalBasis("Something").requestDate(OffsetDateTime.now()).build();
|
||||
|
||||
@ -123,7 +168,9 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest {
|
||||
assertFalse(entity.getIntersectingNodes().isEmpty());
|
||||
assertEquals(1, entity.getPages().size());
|
||||
assertEquals("David Ksenia", entity.getValue());
|
||||
assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId());
|
||||
assertEquals(initialId,
|
||||
entity.getPositionsOnPagePerPage()
|
||||
.get(0).getId());
|
||||
assertFalse(entity.removed());
|
||||
assertFalse(entity.ignored());
|
||||
}
|
||||
@ -131,7 +178,9 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest {
|
||||
|
||||
private void assertRectanglesAlmostEqual(Collection<Rectangle2D> rects1, Collection<Rectangle2D> rects2) {
|
||||
|
||||
if (rects1.stream().allMatch(rect1 -> rects2.stream().anyMatch(rect2 -> rectanglesAlmostEqual(rect1, rect2)))) {
|
||||
if (rects1.stream()
|
||||
.allMatch(rect1 -> rects2.stream()
|
||||
.anyMatch(rect2 -> rectanglesAlmostEqual(rect1, rect2)))) {
|
||||
return;
|
||||
}
|
||||
// use this for nice formatting of error message
|
||||
@ -143,15 +192,18 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest {
|
||||
|
||||
double tolerance = 1e-1;
|
||||
return Math.abs(r1.getX() - r2.getX()) < tolerance &&//
|
||||
Math.abs(r1.getY() - r2.getY()) < tolerance &&//
|
||||
Math.abs(r1.getWidth() - r2.getWidth()) < tolerance &&//
|
||||
Math.abs(r1.getHeight() - r2.getHeight()) < tolerance;
|
||||
Math.abs(r1.getY() - r2.getY()) < tolerance &&//
|
||||
Math.abs(r1.getWidth() - r2.getWidth()) < tolerance &&//
|
||||
Math.abs(r1.getHeight() - r2.getHeight()) < tolerance;
|
||||
}
|
||||
|
||||
|
||||
private static List<Rectangle> toAnnotationRectangles(PositionOnPage positionsOnPage) {
|
||||
|
||||
return positionsOnPage.getRectanglePerLine().stream().map(rectangle2D -> toAnnotationRectangle(rectangle2D, positionsOnPage.getPage().getNumber())).toList();
|
||||
return positionsOnPage.getRectanglePerLine()
|
||||
.stream()
|
||||
.map(rectangle2D -> toAnnotationRectangle(rectangle2D, positionsOnPage.getPage().getNumber()))
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -43,7 +43,9 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest {
|
||||
OffsetDateTime start = OffsetDateTime.now();
|
||||
String reason = "whatever";
|
||||
Document document = buildGraphNoImages("files/new/crafted document.pdf");
|
||||
List<TextEntity> entities = entityCreationService.byString("David Ksenia", "test", EntityType.ENTITY, document).peek(e -> e.apply("T.0.0", reason)).toList();
|
||||
List<TextEntity> entities = entityCreationService.byString("David Ksenia", "test", EntityType.ENTITY, document)
|
||||
.peek(e -> e.apply("T.0.0", reason))
|
||||
.toList();
|
||||
assertFalse(entities.isEmpty());
|
||||
TextEntity entity = entities.get(0);
|
||||
assertTrue(entity.active());
|
||||
@ -52,7 +54,8 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest {
|
||||
assertFalse(entity.resized());
|
||||
assertFalse(entity.ignored());
|
||||
assertEquals("n-a", entity.getMatchedRule().getLegalBasis());
|
||||
String annotationId = entity.getPositionsOnPagePerPage().get(0).getId();
|
||||
String annotationId = entity.getPositionsOnPagePerPage()
|
||||
.get(0).getId();
|
||||
|
||||
// remove first
|
||||
IdRemoval removal = IdRemoval.builder().requestDate(start).fileId(TEST_FILE_ID).annotationId(annotationId).build();
|
||||
@ -73,7 +76,9 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest {
|
||||
assertFalse(entity.ignored());
|
||||
assertFalse(entity.removed());
|
||||
assertEquals(reason + ", removed by manual override, forced by manual override", entity.buildReasonWithManualChangeDescriptions());
|
||||
assertEquals("coolio", entity.getManualOverwrite().getLegalBasis().orElse(entity.getMatchedRule().getLegalBasis()));
|
||||
assertEquals("coolio",
|
||||
entity.getManualOverwrite().getLegalBasis()
|
||||
.orElse(entity.getMatchedRule().getLegalBasis()));
|
||||
|
||||
// remove again
|
||||
IdRemoval removal2 = IdRemoval.builder().requestDate(start.plusSeconds(3)).fileId(TEST_FILE_ID).annotationId(annotationId).build();
|
||||
@ -93,7 +98,7 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest {
|
||||
assertTrue(entity.ignored());
|
||||
assertFalse(entity.applied());
|
||||
assertEquals(reason + ", removed by manual override, forced by manual override, forced by manual override, removed by manual override",
|
||||
entity.buildReasonWithManualChangeDescriptions());
|
||||
entity.buildReasonWithManualChangeDescriptions());
|
||||
|
||||
String legalBasis = "Yeah";
|
||||
String section = "Some random section!";
|
||||
@ -110,10 +115,16 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest {
|
||||
assertTrue(entity.ignored());
|
||||
assertFalse(entity.applied());
|
||||
assertEquals(reason + ", removed by manual override, forced by manual override, forced by manual override, removed by manual override, legal basis was manually changed",
|
||||
entity.buildReasonWithManualChangeDescriptions());
|
||||
assertEquals(value, entity.getManualOverwrite().getValue().orElse(entity.getValue()));
|
||||
assertEquals(legalBasis, entity.getManualOverwrite().getLegalBasis().orElse(entity.getMatchedRule().getLegalBasis()));
|
||||
assertEquals(section, entity.getManualOverwrite().getSection().orElse(entity.getDeepestFullyContainingNode().toString()));
|
||||
entity.buildReasonWithManualChangeDescriptions());
|
||||
assertEquals(value,
|
||||
entity.getManualOverwrite().getValue()
|
||||
.orElse(entity.getValue()));
|
||||
assertEquals(legalBasis,
|
||||
entity.getManualOverwrite().getLegalBasis()
|
||||
.orElse(entity.getMatchedRule().getLegalBasis()));
|
||||
assertEquals(section,
|
||||
entity.getManualOverwrite().getSection()
|
||||
.orElse(entity.getDeepestFullyContainingNode().toString()));
|
||||
|
||||
ManualRecategorization imageRecategorizationRequest = ManualRecategorization.builder()
|
||||
.type("type")
|
||||
@ -122,9 +133,13 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest {
|
||||
.legalBasis("")
|
||||
.build();
|
||||
entity.getManualOverwrite().addChange(imageRecategorizationRequest);
|
||||
assertTrue(entity.getManualOverwrite().getRecategorized().isPresent());
|
||||
assertTrue(entity.getManualOverwrite().getRecategorized().get());
|
||||
assertEquals("type", entity.getManualOverwrite().getType().orElse(entity.type()));
|
||||
assertTrue(entity.getManualOverwrite().getRecategorized()
|
||||
.isPresent());
|
||||
assertTrue(entity.getManualOverwrite().getRecategorized()
|
||||
.get());
|
||||
assertEquals("type",
|
||||
entity.getManualOverwrite().getType()
|
||||
.orElse(entity.type()));
|
||||
}
|
||||
|
||||
|
||||
@ -134,7 +149,9 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest {
|
||||
OffsetDateTime start = OffsetDateTime.now();
|
||||
String reason = "whatever";
|
||||
Document document = buildGraphNoImages("files/new/crafted document.pdf");
|
||||
List<TextEntity> entities = entityCreationService.byString("David Ksenia", "test", EntityType.HINT, document).peek(e -> e.apply("T.0.0", reason)).toList();
|
||||
List<TextEntity> entities = entityCreationService.byString("David Ksenia", "test", EntityType.HINT, document)
|
||||
.peek(e -> e.apply("T.0.0", reason))
|
||||
.toList();
|
||||
assertFalse(entities.isEmpty());
|
||||
TextEntity entity = entities.get(0);
|
||||
assertTrue(entity.active());
|
||||
@ -143,7 +160,8 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest {
|
||||
assertFalse(entity.resized());
|
||||
assertFalse(entity.ignored());
|
||||
assertEquals("n-a", entity.getMatchedRule().getLegalBasis());
|
||||
String annotationId = entity.getPositionsOnPagePerPage().get(0).getId();
|
||||
String annotationId = entity.getPositionsOnPagePerPage()
|
||||
.get(0).getId();
|
||||
|
||||
// remove first
|
||||
IdRemoval removal = IdRemoval.builder().requestDate(start).fileId(TEST_FILE_ID).annotationId(annotationId).build();
|
||||
@ -164,7 +182,9 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest {
|
||||
assertFalse(entity.ignored());
|
||||
assertFalse(entity.removed());
|
||||
assertEquals(reason + ", removed by manual override, forced by manual override", entity.buildReasonWithManualChangeDescriptions());
|
||||
assertEquals("coolio", entity.getManualOverwrite().getLegalBasis().orElse(entity.getMatchedRule().getLegalBasis()));
|
||||
assertEquals("coolio",
|
||||
entity.getManualOverwrite().getLegalBasis()
|
||||
.orElse(entity.getMatchedRule().getLegalBasis()));
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -122,17 +122,20 @@ public class PrecursorEntityTest extends BuildDocumentIntegrationTest {
|
||||
|
||||
assertTrue(document.getEntities().isEmpty());
|
||||
|
||||
List<PrecursorEntity> notFoundManualEntities = entityFromPrecursorCreationService.createEntitiesIfFoundAndReturnNotFoundEntries(ManualRedactions.builder().entriesToAdd(Set.of(manualRedactionEntry)).build(),
|
||||
document,
|
||||
TEST_DOSSIER_TEMPLATE_ID);
|
||||
List<PrecursorEntity> notFoundManualEntities = entityFromPrecursorCreationService.createEntitiesIfFoundAndReturnNotFoundEntries(ManualRedactions.builder()
|
||||
.entriesToAdd(Set.of(
|
||||
manualRedactionEntry))
|
||||
.build(),
|
||||
document,
|
||||
TEST_DOSSIER_TEMPLATE_ID);
|
||||
assertEquals(1, notFoundManualEntities.size());
|
||||
assertTrue(document.getEntities().isEmpty());
|
||||
|
||||
List<EntityLogEntry> redactionLogEntries = entityLogCreatorService.createInitialEntityLog(new AnalyzeRequest(),
|
||||
document,
|
||||
notFoundManualEntities,
|
||||
new DictionaryVersion(),
|
||||
0L).getEntityLogEntry();
|
||||
document,
|
||||
notFoundManualEntities,
|
||||
new DictionaryVersion(),
|
||||
0L).getEntityLogEntry();
|
||||
|
||||
assertEquals(1, redactionLogEntries.size());
|
||||
assertEquals(value, redactionLogEntries.get(0).getValue());
|
||||
@ -146,7 +149,8 @@ public class PrecursorEntityTest extends BuildDocumentIntegrationTest {
|
||||
Document document = buildGraph("files/new/VV-919901.pdf");
|
||||
EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService);
|
||||
|
||||
List<TextEntity> tempEntities = entityCreationService.byString("To: Syngenta Ltd.", "temp", EntityType.ENTITY, document).toList();
|
||||
List<TextEntity> tempEntities = entityCreationService.byString("To: Syngenta Ltd.", "temp", EntityType.ENTITY, document)
|
||||
.toList();
|
||||
assertFalse(tempEntities.isEmpty());
|
||||
var tempEntity = tempEntities.get(0);
|
||||
List<Rectangle> positions = tempEntity.getPositionsOnPagePerPage()
|
||||
@ -172,21 +176,28 @@ public class PrecursorEntityTest extends BuildDocumentIntegrationTest {
|
||||
tempEntity.removeFromGraph();
|
||||
assertTrue(document.getEntities().isEmpty());
|
||||
|
||||
List<PrecursorEntity> notFoundManualEntities = entityFromPrecursorCreationService.createEntitiesIfFoundAndReturnNotFoundEntries(ManualRedactions.builder().entriesToAdd(Set.of(manualRedactionEntry)).build(),
|
||||
document,
|
||||
TEST_DOSSIER_TEMPLATE_ID);
|
||||
List<PrecursorEntity> notFoundManualEntities = entityFromPrecursorCreationService.createEntitiesIfFoundAndReturnNotFoundEntries(ManualRedactions.builder()
|
||||
.entriesToAdd(Set.of(
|
||||
manualRedactionEntry))
|
||||
.build(),
|
||||
document,
|
||||
TEST_DOSSIER_TEMPLATE_ID);
|
||||
assertTrue(notFoundManualEntities.isEmpty());
|
||||
assertEquals(1, document.getEntities().size());
|
||||
return new DocumentAndEntity(document, document.getEntities().stream().findFirst().get());
|
||||
return new DocumentAndEntity(document,
|
||||
document.getEntities()
|
||||
.stream()
|
||||
.findFirst()
|
||||
.get());
|
||||
}
|
||||
|
||||
|
||||
public static Rectangle toAnnotationRectangle(Rectangle2D rectangle2D, int pageNumber) {
|
||||
|
||||
return new Rectangle(new Point((float) rectangle2D.getMinX(), (float) (rectangle2D.getMinY() + rectangle2D.getHeight())),
|
||||
(float) rectangle2D.getWidth(),
|
||||
-(float) rectangle2D.getHeight(),
|
||||
pageNumber);
|
||||
(float) rectangle2D.getWidth(),
|
||||
-(float) rectangle2D.getHeight(),
|
||||
pageNumber);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -22,7 +22,6 @@ public class AnalyseFileRealDataIntegrationTest extends LiveDataIntegrationTest
|
||||
public static final String FILE_NAME = "test-file";
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void testFile() {
|
||||
@ -41,13 +40,15 @@ public class AnalyseFileRealDataIntegrationTest extends LiveDataIntegrationTest
|
||||
redactionServiceSettings.setNerServiceEnabled(false);
|
||||
}
|
||||
|
||||
AnalyzeRequest ar = AnalyzeRequest.builder().fileId(FILE_ID).dossierId(DOSSIER_ID)
|
||||
.analysisNumber(1).dossierTemplateId(DOSSIER_TEMPLATE_ID)
|
||||
AnalyzeRequest ar = AnalyzeRequest.builder()
|
||||
.fileId(FILE_ID)
|
||||
.dossierId(DOSSIER_ID)
|
||||
.analysisNumber(1)
|
||||
.dossierTemplateId(DOSSIER_TEMPLATE_ID)
|
||||
.lastProcessed(OffsetDateTime.now())
|
||||
.fileAttributes(List.of())
|
||||
.build();
|
||||
|
||||
|
||||
// try {
|
||||
// var redactionLog = new ClassPathResource(BASE_DIR + "data/" + FILE_NAME + ".redaction-log.json").getInputStream();
|
||||
// } catch (Exception e) {
|
||||
|
||||
@ -159,14 +159,19 @@ public class LiveDataIntegrationTest {
|
||||
when(dictionaryClient.getAllTypesForDossier(anyString(), anyBoolean())).thenReturn(new ArrayList<>());
|
||||
|
||||
when(dictionaryClient.getColors(anyString())).thenReturn(objectMapper.readValue(new ClassPathResource(BASE_DIR + EFSA_SANITISATION_GFL_V1 + "colors.json").getInputStream(),
|
||||
Colors.class));
|
||||
Colors.class));
|
||||
|
||||
when(dictionaryClient.getDictionaryForType(anyString(), nullable(Long.class))).then(answer -> {
|
||||
String typeName = answer.getArgument(0);
|
||||
|
||||
var found = types.stream().filter(t -> t.getType().equalsIgnoreCase(typeName)).findFirst();
|
||||
var found = types.stream()
|
||||
.filter(t -> t.getType().equalsIgnoreCase(typeName))
|
||||
.findFirst();
|
||||
if (found.isPresent()) {
|
||||
var type = types.stream().filter(t -> t.getType().equalsIgnoreCase(typeName)).findFirst().get();
|
||||
var type = types.stream()
|
||||
.filter(t -> t.getType().equalsIgnoreCase(typeName))
|
||||
.findFirst()
|
||||
.get();
|
||||
type.setEntries(getEntries(typeName, type.getTypeId()));
|
||||
type.setFalsePositiveEntries(getFalsePositiveEntries(typeName, type.getTypeId()));
|
||||
|
||||
@ -188,13 +193,20 @@ public class LiveDataIntegrationTest {
|
||||
when(dictionaryClient.getDictionaryForType(anyString(), nullable(Long.class))).then(answer -> {
|
||||
String typeName = answer.getArgument(0);
|
||||
|
||||
var found = types.stream().filter(t -> t.getType().equalsIgnoreCase(typeName)).findFirst();
|
||||
var found = types.stream()
|
||||
.filter(t -> t.getType().equalsIgnoreCase(typeName))
|
||||
.findFirst();
|
||||
if (found.isPresent()) {
|
||||
|
||||
var type = types.stream().filter(t -> t.getType().equalsIgnoreCase(typeName)).findFirst().get();
|
||||
var type = types.stream()
|
||||
.filter(t -> t.getType().equalsIgnoreCase(typeName))
|
||||
.findFirst()
|
||||
.get();
|
||||
|
||||
if (deltaTypeName.equalsIgnoreCase(typeName)) {
|
||||
type.setEntries(values.stream().map(l -> new DictionaryEntry(0, l, version, false, type.getTypeId())).collect(Collectors.toList()));
|
||||
type.setEntries(values.stream()
|
||||
.map(l -> new DictionaryEntry(0, l, version, false, type.getTypeId()))
|
||||
.collect(Collectors.toList()));
|
||||
} else {
|
||||
type.setEntries(new ArrayList<>());
|
||||
}
|
||||
@ -226,7 +238,9 @@ public class LiveDataIntegrationTest {
|
||||
if (Objects.requireNonNull(resource.getFilename()).contains(typeName) && !resource.getFilename().contains("false_positive")) {
|
||||
|
||||
List<String> lines = IOUtils.readLines(resource.getInputStream());
|
||||
return lines.stream().map(l -> new DictionaryEntry(0, l, 0L, false, typeId)).collect(Collectors.toList());
|
||||
return lines.stream()
|
||||
.map(l -> new DictionaryEntry(0, l, 0L, false, typeId))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
}
|
||||
}
|
||||
@ -242,7 +256,9 @@ public class LiveDataIntegrationTest {
|
||||
if (Objects.requireNonNull(resource.getFilename()).contains(typeName) && resource.getFilename().contains("false_positive")) {
|
||||
|
||||
List<String> lines = IOUtils.readLines(resource.getInputStream());
|
||||
return lines.stream().map(l -> new DictionaryEntry(0, l, 0L, false, typeId)).collect(Collectors.toList());
|
||||
return lines.stream()
|
||||
.map(l -> new DictionaryEntry(0, l, 0L, false, typeId))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -68,13 +68,13 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
|
||||
Document document = buildGraphNoImages(filePath);
|
||||
List<NerEntities.NerEntity> entityRecognitionEntities = validateAndCombine(parseNerEntities(nerEntitiesFilePath), document);
|
||||
assertFalse(entityRecognitionEntities.isEmpty());
|
||||
assertTrue(entityRecognitionEntities.stream().allMatch(entity -> entity.textRange().start() < entity.textRange().end()));
|
||||
assertTrue(entityRecognitionEntities.stream()
|
||||
.allMatch(entity -> entity.textRange().start() < entity.textRange().end()));
|
||||
|
||||
ClassPathResource resource = new ClassPathResource(filePath);
|
||||
try (PDDocument pdDocument = Loader.loadPDF(resource.getFile())) {
|
||||
|
||||
Stream<NerEntities.NerEntity> unchangedAddressParts = NerEntitiesAdapter.toNerEntities(parseNerEntities(nerEntitiesFilePath), document)
|
||||
.getNerEntityList()
|
||||
Stream<NerEntities.NerEntity> unchangedAddressParts = NerEntitiesAdapter.toNerEntities(parseNerEntities(nerEntitiesFilePath), document).getNerEntityList()
|
||||
.stream()
|
||||
.filter(e -> !e.type().equals("CBI_author"));
|
||||
List<TextEntity> redactionEntities = Stream.concat(entityRecognitionEntities.stream(), unchangedAddressParts)
|
||||
@ -83,12 +83,15 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
|
||||
.map(Optional::get)
|
||||
.toList();
|
||||
redactionEntities.stream()
|
||||
.collect(Collectors.groupingBy(e -> e.getPages().stream().findFirst().get().getNumber()))
|
||||
.collect(Collectors.groupingBy(e -> e.getPages()
|
||||
.stream()
|
||||
.findFirst()
|
||||
.get().getNumber()))
|
||||
.forEach((pageNumber, entities) -> drawNerEntitiesAsPartsAndCombined(pageNumber,
|
||||
getPositionsFromEntityOfType("CBI_author", entities),
|
||||
getPositionsFromEntityNotOfType(List.of("CBI_author", "CBI_address"), entities),
|
||||
getPositionsFromEntityOfType("CBI_address", entities),
|
||||
pdDocument));
|
||||
getPositionsFromEntityOfType("CBI_author", entities),
|
||||
getPositionsFromEntityNotOfType(List.of("CBI_author", "CBI_address"), entities),
|
||||
getPositionsFromEntityOfType("CBI_address", entities),
|
||||
pdDocument));
|
||||
|
||||
File outputFile = new File("/tmp/nerEntities.pdf");
|
||||
pdDocument.save(outputFile);
|
||||
@ -109,7 +112,8 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
|
||||
log.info("Parsed NerEntitiesModel");
|
||||
NerEntities nerEntities = NerEntitiesAdapter.toNerEntities(nerEntitiesModel, document);
|
||||
log.info("Validated and mapped");
|
||||
List<TextRange> nerEntityBoundaries = NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities).toList();
|
||||
List<TextRange> nerEntityBoundaries = NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities)
|
||||
.toList();
|
||||
log.info("Combined to CBI_address");
|
||||
List<TextEntity> cbiAddressEntities = nerEntityBoundaries.stream()
|
||||
.map(b -> entityCreationService.byTextRange(b, "CBI_address", EntityType.RECOMMENDATION, document))
|
||||
@ -117,25 +121,28 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
|
||||
.map(Optional::get)
|
||||
.toList();
|
||||
assertFalse(cbiAddressEntities.isEmpty());
|
||||
assertTrue(cbiAddressEntities.stream().allMatch(entity -> entity.getTextRange().start() < entity.getTextRange().end()));
|
||||
assertTrue(cbiAddressEntities.stream()
|
||||
.allMatch(entity -> entity.getTextRange().start() < entity.getTextRange().end()));
|
||||
|
||||
ClassPathResource resource = new ClassPathResource(filePath);
|
||||
try (PDDocument pdDocument = Loader.loadPDF(resource.getFile())) {
|
||||
|
||||
List<TextEntity> validatedEntities = NerEntitiesAdapter.toNerEntities(parseNerEntities(nerEntitiesFilePath), document)
|
||||
.getNerEntityList()
|
||||
List<TextEntity> validatedEntities = NerEntitiesAdapter.toNerEntities(parseNerEntities(nerEntitiesFilePath), document).getNerEntityList()
|
||||
.stream()
|
||||
.map(e -> entityCreationService.byTextRange(e.textRange(), e.type(), EntityType.ENTITY, document))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.toList();
|
||||
Stream.concat(cbiAddressEntities.stream(), validatedEntities.stream())
|
||||
.collect(Collectors.groupingBy(e -> e.getPages().stream().findFirst().get().getNumber()))
|
||||
.collect(Collectors.groupingBy(e -> e.getPages()
|
||||
.stream()
|
||||
.findFirst()
|
||||
.get().getNumber()))
|
||||
.forEach((pageNumber, entities) -> drawNerEntitiesAsPartsAndCombined(pageNumber,
|
||||
getPositionsFromEntityOfType("CBI_author", entities),
|
||||
getPositionsFromEntityNotOfType(List.of("CBI_author", "CBI_address"), entities),
|
||||
getPositionsFromEntityOfType("CBI_address", entities),
|
||||
pdDocument));
|
||||
getPositionsFromEntityOfType("CBI_author", entities),
|
||||
getPositionsFromEntityNotOfType(List.of("CBI_author", "CBI_address"), entities),
|
||||
getPositionsFromEntityOfType("CBI_address", entities),
|
||||
pdDocument));
|
||||
|
||||
File outputFile = new File("/tmp/nerEntities.pdf");
|
||||
pdDocument.save(outputFile);
|
||||
@ -147,11 +154,13 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
|
||||
|
||||
NerEntities nerEntities = NerEntitiesAdapter.toNerEntities(nerEntitiesModel, document);
|
||||
|
||||
List<NerEntities.NerEntity> cbiAuthors = nerEntities.streamEntitiesOfType("CBI_author").toList();
|
||||
List<NerEntities.NerEntity> cbiAuthors = nerEntities.streamEntitiesOfType("CBI_author")
|
||||
.toList();
|
||||
Stream<NerEntities.NerEntity> cbiAddress = NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities)
|
||||
.map(boundary -> new NerEntities.NerEntity(document.getTextBlock().subSequence(boundary).toString(), boundary, "CBI_address"));
|
||||
|
||||
return Stream.concat(cbiAuthors.stream(), cbiAddress).toList();
|
||||
return Stream.concat(cbiAuthors.stream(), cbiAddress)
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
@ -167,14 +176,17 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
|
||||
|
||||
private List<Rectangle2D> getPositionsFromEntityOfType(String type, List<TextEntity> entities) {
|
||||
|
||||
return getPositionsFromEntities(entities.stream().filter(e -> e.type().equals(type)));
|
||||
return getPositionsFromEntities(entities.stream()
|
||||
.filter(e -> e.type().equals(type)));
|
||||
|
||||
}
|
||||
|
||||
|
||||
private List<Rectangle2D> getPositionsFromEntityNotOfType(List<String> types, List<TextEntity> entities) {
|
||||
|
||||
return getPositionsFromEntities(entities.stream().filter(e -> types.stream().noneMatch(type -> e.type().equals(type))));
|
||||
return getPositionsFromEntities(entities.stream()
|
||||
.filter(e -> types.stream()
|
||||
.noneMatch(type -> e.type().equals(type))));
|
||||
|
||||
}
|
||||
|
||||
@ -188,13 +200,13 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
|
||||
|
||||
PdfVisualisationUtility.drawRectangle2DList(pdDocument, pageNumber, cbiAuthorRects, PdfVisualisationUtility.Options.builder().stroke(true).strokeColor(Color.blue).build());
|
||||
PdfVisualisationUtility.drawRectangle2DList(pdDocument,
|
||||
pageNumber,
|
||||
addressPartsRects,
|
||||
PdfVisualisationUtility.Options.builder().stroke(true).strokeColor(Color.MAGENTA).build());
|
||||
pageNumber,
|
||||
addressPartsRects,
|
||||
PdfVisualisationUtility.Options.builder().stroke(true).strokeColor(Color.MAGENTA).build());
|
||||
PdfVisualisationUtility.drawRectangle2DList(pdDocument,
|
||||
pageNumber,
|
||||
cbiAddressRects,
|
||||
PdfVisualisationUtility.Options.builder().stroke(true).strokeColor(Color.green).build());
|
||||
pageNumber,
|
||||
cbiAddressRects,
|
||||
PdfVisualisationUtility.Options.builder().stroke(true).strokeColor(Color.green).build());
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -30,7 +30,13 @@ public class RegExPatternTest {
|
||||
@Test
|
||||
public void testEmailRegEx() {
|
||||
|
||||
String text = "Address: Schwarzwaldalle " + "P.O.Box\n" + "CH-4002 Basel\n" + "Switzerland\n" + "Contact: Christian Warmers\n" + "Tel: +41 (61) 323 8044\n" + "christian.warmers@syngenta.com";
|
||||
String text = "Address: Schwarzwaldalle "
|
||||
+ "P.O.Box\n"
|
||||
+ "CH-4002 Basel\n"
|
||||
+ "Switzerland\n"
|
||||
+ "Contact: Christian Warmers\n"
|
||||
+ "Tel: +41 (61) 323 8044\n"
|
||||
+ "christian.warmers@syngenta.com";
|
||||
|
||||
Pattern p = Pattern.compile("\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,4}\\b", Pattern.CASE_INSENSITIVE);
|
||||
|
||||
@ -46,7 +52,31 @@ public class RegExPatternTest {
|
||||
@Test
|
||||
public void testEtAlRegEx() {
|
||||
|
||||
String text = "To assess the potential of S-metolachlor to cause endocrine disruption (ED) a review (Charlton 2014,\n" + "ASB2016-762) was submitted that summarises results from regulatory and open scientific literature\n" + "studies covering in vitro and in vivo studies (level 2-5 of the OECD Conceptual Framework). According to this information metolachlor increased (1.5-fold) aromatase activity in JEG-3 cells (Laville et al.\n" + "2006, ASB2010-14391) and induced weak anti-androgenic activity in the MDA-kb2 reporter cell line\n" + "with a IC50 of 9.92 µM (IC50 of positive control flutamide: 0.51 µM) (Aït-Aïssa et al. 2010, ASB2015-\n" + "9562). Data from the Tox21 high throughput screening revealed just few postive findings in assays to\n" + "identify antagonists of the androgen receptor. An isolated result of this screening showed agonistic\n" + "activity on the thyroid stimulating hormone receptor, while Dalton et al. (2003, ASB2018-2832)\n" + "demonstrated that metolachlor induced CYP2B1/2 and CYP3A1/2 but did not affect T4, T3 or TSH.\n" + "After prepubertal exposure of male Wistar rats to metolachlor (Mathias et al. 2012, ASB2016-9890) a\n" + "statistically significant increase of serum hormone concentration was observed for testosterone (at the\n" + "dose 50 mg/kg) as well as a statistically significant decrease in the age of preputial separation at a dose\n" + "of 5 and 50 mg/kg. Furthermore a statistically significant increase for estradiol at a dose of 50 mg/kg\n" + "and for FSH at a dose of 5 and 50 mg/kg and morphological alterations of the seminiferous epithelium\n" + "were observed. Relative testicular weight was not altered. A statistically significant increase of relative\n" + "weights was observed in long-term studies with rats (Tisdel et al. 1983, TOX9800328 ). This finding\n" + "was attributed to lower terminal body weight. In mice a statistically significant decrease of the weight\n" + "seminal vesicle (Tisdel et al. 1982, TOX9800327) was shown after 24 month treatment with\n" + "metolachlor. In a mouse preimplantation embryo assay from open literature metolachlor increased the\n" + "percentage of apoptosis significantly and reduced the mean number of cells per embryo significantly\n" + "while the percentage of developing blastocytes was unaltered (Grennlee et al. 2004, ASB2016-9889).\n" + "In reproduvtive toxicity studies a retarded body weight development of the pups was observed, while\n" + "survival and normal morphological and functional development were not altered. No adverse effects\n" + "on male fertility were seen, however important parameters to assess effects on female fertility like\n" + "cyclicity, ovarian follicles as well as developmental landmarks in the offspring have not been investigated.";
|
||||
String text = "To assess the potential of S-metolachlor to cause endocrine disruption (ED) a review (Charlton 2014,\n"
|
||||
+ "ASB2016-762) was submitted that summarises results from regulatory and open scientific literature\n"
|
||||
+ "studies covering in vitro and in vivo studies (level 2-5 of the OECD Conceptual Framework). According to this information metolachlor increased (1.5-fold) aromatase activity in JEG-3 cells (Laville et al.\n"
|
||||
+ "2006, ASB2010-14391) and induced weak anti-androgenic activity in the MDA-kb2 reporter cell line\n"
|
||||
+ "with a IC50 of 9.92 µM (IC50 of positive control flutamide: 0.51 µM) (Aït-Aïssa et al. 2010, ASB2015-\n"
|
||||
+ "9562). Data from the Tox21 high throughput screening revealed just few postive findings in assays to\n"
|
||||
+ "identify antagonists of the androgen receptor. An isolated result of this screening showed agonistic\n"
|
||||
+ "activity on the thyroid stimulating hormone receptor, while Dalton et al. (2003, ASB2018-2832)\n"
|
||||
+ "demonstrated that metolachlor induced CYP2B1/2 and CYP3A1/2 but did not affect T4, T3 or TSH.\n"
|
||||
+ "After prepubertal exposure of male Wistar rats to metolachlor (Mathias et al. 2012, ASB2016-9890) a\n"
|
||||
+ "statistically significant increase of serum hormone concentration was observed for testosterone (at the\n"
|
||||
+ "dose 50 mg/kg) as well as a statistically significant decrease in the age of preputial separation at a dose\n"
|
||||
+ "of 5 and 50 mg/kg. Furthermore a statistically significant increase for estradiol at a dose of 50 mg/kg\n"
|
||||
+ "and for FSH at a dose of 5 and 50 mg/kg and morphological alterations of the seminiferous epithelium\n"
|
||||
+ "were observed. Relative testicular weight was not altered. A statistically significant increase of relative\n"
|
||||
+ "weights was observed in long-term studies with rats (Tisdel et al. 1983, TOX9800328 ). This finding\n"
|
||||
+ "was attributed to lower terminal body weight. In mice a statistically significant decrease of the weight\n"
|
||||
+ "seminal vesicle (Tisdel et al. 1982, TOX9800327) was shown after 24 month treatment with\n"
|
||||
+ "metolachlor. In a mouse preimplantation embryo assay from open literature metolachlor increased the\n"
|
||||
+ "percentage of apoptosis significantly and reduced the mean number of cells per embryo significantly\n"
|
||||
+ "while the percentage of developing blastocytes was unaltered (Grennlee et al. 2004, ASB2016-9889).\n"
|
||||
+ "In reproduvtive toxicity studies a retarded body weight development of the pups was observed, while\n"
|
||||
+ "survival and normal morphological and functional development were not altered. No adverse effects\n"
|
||||
+ "on male fertility were seen, however important parameters to assess effects on female fertility like\n"
|
||||
+ "cyclicity, ovarian follicles as well as developmental landmarks in the offspring have not been investigated.";
|
||||
|
||||
Pattern p = Pattern.compile("([^\\s(]*?( \\w\\.?)?) et al\\.?");
|
||||
|
||||
@ -64,7 +94,6 @@ public class RegExPatternTest {
|
||||
|
||||
String word = "Porch JR, " + "Kendall TZ, " + "Krueger HO";
|
||||
|
||||
|
||||
Pattern pattern = Pattern.compile("[A-ZÄÖÜ][\\wäöüéèê]{2,}( [A-ZÄÖÜ]{1,2}\\.)+");
|
||||
Matcher matcher = pattern.matcher(word);
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user