diff --git a/publish-custom-image.sh b/publish-custom-image.sh index 89b9f8d9..08f7342d 100755 --- a/publish-custom-image.sh +++ b/publish-custom-image.sh @@ -1,5 +1,9 @@ #!/bin/bash + +set -e + dir=${PWD##*/} + gradle assemble # Get the current Git branch @@ -11,5 +15,45 @@ commit_hash=$(git rev-parse --short=5 HEAD) # Combine branch and commit hash buildName="${USER}-${branch}-${commit_hash}" -gradle bootBuildImage --cleanCache --publishImage -PbuildbootDockerHostNetwork=true -Pversion=$buildName -echo "nexus.knecon.com:5001/red/${dir}-server-v1:$buildName" +gradle bootBuildImage --publishImage -PbuildbootDockerHostNetwork=true -Pversion=${buildName} + +newImageName="nexus.knecon.com:5001/red/${dir}-server-v1:${buildName}" + +echo "full image name:" +echo ${newImageName} +echo "" + +if [ -z "$1" ]; then + exit 0 +fi + +namespace=${1} +deployment_name="redaction-service-v1" +deployment_name2="redaction-priority-service-v1" + +echo "deploying to ${namespace}" + +oldImageName=$(rancher kubectl -n ${namespace} get deployment ${deployment_name} -o=jsonpath='{.spec.template.spec.containers[*].image}') + +if [ "${newImageName}" = "${oldImageName}" ]; then + echo "Image tag of ${deployment_name} did not change, redeploying..." + rancher kubectl rollout restart deployment ${deployment_name} -n ${namespace} +else + echo "upgrading the image tag of ${deployment_name}..." + rancher kubectl set image deployment/${deployment_name} ${deployment_name}=${newImageName} -n ${namespace} +fi + +oldImageName=$(rancher kubectl -n ${namespace} get deployment ${deployment_name2} -o=jsonpath='{.spec.template.spec.containers[*].image}') + +if [ "${newImageName}" = "${oldImageName}" ]; then + echo "Image tag of ${deployment_name2} did not change, redeploying..." + rancher kubectl rollout restart deployment ${deployment_name2} -n ${namespace} +else + echo "upgrading the image tag of ${deployment_name2}..." + rancher kubectl set image deployment/${deployment_name2} ${deployment_name2}=${newImageName} -n ${namespace} +fi +rancher kubectl rollout status deployment ${deployment_name} -n ${namespace} +echo "Deployed ${deployment_name}:${buildName} to ${namespace}" + +rancher kubectl rollout status deployment ${deployment_name2} -n ${namespace} +echo "Deployed ${deployment_name2}:${buildName} to ${namespace}" diff --git a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts index 80e49ee9..54c5aac3 100644 --- a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts +++ b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts @@ -16,7 +16,7 @@ val layoutParserVersion = "0.131.0" val jacksonVersion = "2.15.2" val droolsVersion = "9.44.0.Final" val pdfBoxVersion = "3.0.0" -val persistenceServiceVersion = "2.420.0" +val persistenceServiceVersion = "2.429.0" val springBootStarterVersion = "3.1.5" val springCloudVersion = "4.0.4" val testContainersVersion = "1.19.7" @@ -65,6 +65,7 @@ dependencies { implementation("org.reflections:reflections:0.10.2") + implementation("com.opencsv:opencsv:5.9") implementation("com.joestelmach:natty:0.13") testImplementation(project(":rules-management")) @@ -99,8 +100,6 @@ tasks.test { } - - tasks.named("bootBuildImage") { environment.put("BPE_DELIM_JAVA_TOOL_OPTIONS", " ") diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/ComponentMapping.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/ComponentMapping.java new file mode 100644 index 00000000..5103f924 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/ComponentMapping.java @@ -0,0 +1,192 @@ +package com.iqser.red.service.redaction.v1.server.model.component; + +import java.util.Collections; +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; +import java.util.stream.Stream; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.component.ComponentMappingMetadata; +import com.iqser.red.service.redaction.v1.server.utils.QueryParser; + +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.experimental.FieldDefaults; +import lombok.experimental.SuperBuilder; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Getter +@SuperBuilder +@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) +public class ComponentMapping implements Queryable { + + static Comparator PRIMARY_KEY_COMPARATOR = Comparator.comparing(s -> s[0]); + ComponentMappingMetadata metadata; + List data; + + + public static ComponentMapping empty(String name) { + + return ComponentMapping.builder() + .metadata(ComponentMappingMetadata.builder().version(-1).name(name).columnLabels(Collections.emptyList()).build()) + .data(Collections.emptyList()) + .build(); + } + + + @Override + public Query where(String query) { + + ComponentMappingQueryParameters queryParameters = QueryParser.parse(query, getMetaData().getColumnLabels()); + if (queryParameters.columnIdx() == 0 && queryParameters.operator().equals(QueryOperator.EQUALS)) { + return wherePrimaryKey(queryParameters); + } + return Queryable.super.where(query); + } + + + /** + * Determines if the mapping contains at least one row, where the first column is equal to the provided query string. + * + * @param query String to search for + * @return true, if the mapping has at least one row, which has the query string as its first value. + */ + public boolean existsByFirstColumn(String query) { + + int idx = Collections.binarySearch(data, new String[]{query}, PRIMARY_KEY_COMPARATOR); + return idx >= 0; + } + + + private Query wherePrimaryKey(ComponentMappingQueryParameters queryParameters) { + + // find any element which fulfills query + int idx = Collections.binarySearch(data, new String[]{queryParameters.query()}, PRIMARY_KEY_COMPARATOR); + if (idx < 0) { + return new Query(getMetaData(), Stream.empty()); + } + + // find first element which fulfills query + while (idx > 0 && data.get(idx - 1)[0].equals(queryParameters.query())) { + idx--; + } + + // find all elements which fulfill query + List result = new LinkedList<>(); + while (data.get(idx)[0].equals(queryParameters.query())) { + result.add(data.get(idx)); + idx++; + } + + return new Query(getMetaData(), result.stream()); + } + + + /** + * Gets all data of this mapping. + * + * @return a list of string arrays representing the data of this component mapping + */ + @Override + public Stream getData() { + + return data.stream(); + } + + + @Override + public ComponentMappingMetadata getMetaData() { + + return metadata; + } + + + @Builder + @AllArgsConstructor + public static class Query implements Queryable { + + ComponentMappingMetadata metadata; + Stream filteredData; + + + @Override + public Stream getData() { + + return filteredData; + } + + + @Override + public ComponentMappingMetadata getMetaData() { + + return metadata; + } + + + /** + * Selects any number of fields from the data. + * If a string does not match any of the column labels, it is ignored. + * + * @param selects any number of column labels + * @return Stream of string arrays, dimension matches number of valid column labels. + */ + public Stream select(String... selects) { + + int[] selectedColumns = mapStringsToColumnLabels(selects); + + return filteredData.map(stringArray -> sliceArray(stringArray, selectedColumns)); + } + + + /** + * Selects a single field, identified by its column label. + * If the column label is not present in the data, an empty result will be returned. + * If all fields should be retrieved, use {@link #getData()} + * + * @param selector a single column label + * @return Stream of strings matching the previous queries + */ + public Stream select(String selector) { + + int selectedColumn = getMetaData().getColumnLabels().indexOf(selector); + if (selectedColumn < 0) { + log.info("String {} not found in column labels.", selector); + return Stream.empty(); + } + + return filteredData.map(stringArray -> stringArray[selectedColumn]); + } + + + private int[] mapStringsToColumnLabels(String[] selects) { + + List selectedColumns = new LinkedList<>(); + for (int i = 0; i < selects.length; i++) { + int column = getMetaData().getColumnLabels().indexOf(selects[i]); + if (column < 0) { + log.info("Could not find column label: {}", selects[i]); + continue; + } + selectedColumns.add(column); + } + return selectedColumns.stream() + .mapToInt(Integer::intValue) + .toArray(); + } + + + private String[] sliceArray(String[] strings, int[] columns) { + + String[] slicedArray = new String[columns.length]; + for (int i = 0; i < columns.length; i++) { + slicedArray[i] = strings[columns[i]]; + } + return slicedArray; + } + + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/ComponentMappingQueryParameters.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/ComponentMappingQueryParameters.java new file mode 100644 index 00000000..6d61fd1f --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/ComponentMappingQueryParameters.java @@ -0,0 +1,10 @@ +package com.iqser.red.service.redaction.v1.server.model.component; + +public record ComponentMappingQueryParameters(int columnIdx, QueryOperator operator, String query) { + + public static ComponentMappingQueryParameters invalid() { + + return new ComponentMappingQueryParameters(0, QueryOperator.INVALID, ""); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/QueryOperator.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/QueryOperator.java new file mode 100644 index 00000000..89ee1b4b --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/QueryOperator.java @@ -0,0 +1,42 @@ +package com.iqser.red.service.redaction.v1.server.model.component; + +import java.util.Locale; + +/** + * Enum describing the different types of allowed queries.

+ * EQUALS requires the query to match exactly,

+ * CONTAINS requires the query to partially match the field.

+ * MATCHES assumes the query to be a regular expression and requires the string to match the regex.

+ * SOUNDSLIKE requires the query to match the field with SOUNDEX based matching.

+ * INVALID never matches.

+ */ +public enum QueryOperator { + EQUALS, + CONTAINS, + MATCHES, + SOUNDSLIKE, + INVALID; + + + /** + * Determines the operator from the query string. Its case-insensitive.

+ * "=", "equals" -> EQUALS

+ * "contains" -> CONTAINS

+ * "matches" -> MATCHES

+ * "soundslike" -> SOUNDSLIKE

+ * if none of the above options matches an INVALID operator will be returned

+ * + * @param name String naming the operator + * @return the operator parsed from the name + */ + public static QueryOperator fromString(String name) { + + return switch (name.toLowerCase(Locale.US)) { + case "=", "equals" -> EQUALS; + case "contains" -> CONTAINS; + case "matches" -> MATCHES; + case "soundslike" -> SOUNDSLIKE; + default -> INVALID; + }; + } +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/Queryable.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/Queryable.java new file mode 100644 index 00000000..223a7593 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/Queryable.java @@ -0,0 +1,89 @@ +package com.iqser.red.service.redaction.v1.server.model.component; + +import java.util.stream.Stream; + +import org.apache.commons.codec.EncoderException; +import org.apache.commons.codec.language.Soundex; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.component.ComponentMappingMetadata; +import com.iqser.red.service.redaction.v1.server.utils.Patterns; +import com.iqser.red.service.redaction.v1.server.utils.QueryParser; + +public interface Queryable { + + Soundex soundex = new Soundex(); + + + /** + * Returns the already filtered data of this queryable object. + * + * @return the queried data as a stream + */ + Stream getData(); + + + /** + * Returns the metadata associated with this component mapping. + * + * @return metadata of the component mapping + */ + ComponentMappingMetadata getMetaData(); + + + /** + * Queries the data using the provided query.

+ * A simple query string might look something like this:

+ * number = 10

+ * "number" must match one of the column labels exactly, "=" describes the operator, and "10" the value.

+ * Other operators include:

+ * - =, equals

+ * - contains

+ * - matches

+ * - soundslike

+ * For more details, see: {@link com.iqser.red.service.redaction.v1.server.model.component.QueryOperator#fromString QueryOperator}.

+ * It's also important to note all strings containing whitespaces must be escaped using the char ' as such:

+ * name contains 'John Doe'

+ * If the query is supposed to contain the character ', the character must be escaped using \. e.g.

+ * name = Peter\\'s

+ * The escape character \ needs to be escaped itself due to java's string handling.

+ * When performance is of concern due to very large component mapping files, the first query should use the EQUALS operator and be performed on the first column of the CSV.

+ * Then a binary search will be performed, significantly speeding up the first lookup.

+ *

+ * + * @param query a String describing the query to be performed on the data of this queryable object. + * @return a Query containing the queries data as a Stream of String arrays. + */ + default ComponentMapping.Query where(String query) { + + ComponentMappingQueryParameters queryParameters = QueryParser.parse(query, getMetaData().getColumnLabels()); + + Stream filteredData = getData(); + + filteredData = filteredData.filter(columns -> matches(columns[queryParameters.columnIdx()], queryParameters)); + + return new ComponentMapping.Query(getMetaData(), filteredData); + } + + + private boolean matches(String string, ComponentMappingQueryParameters parameters) { + + return switch (parameters.operator()) { + case EQUALS -> string.equals(parameters.query()); + case CONTAINS -> string.contains(parameters.query()); + case MATCHES -> Patterns.getCompiledPattern(parameters.query(), false).matcher(string).matches(); + case SOUNDSLIKE -> soundslike(string, parameters.query()); + case INVALID -> false; + }; + } + + + private static boolean soundslike(String string, String query) { + + try { + return soundex.difference(string, query) < 0.1 * string.length(); + } catch (EncoderException e) { + return string.equals(query); + } + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/dictionary/SearchImplementation.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/dictionary/SearchImplementation.java index 0dfc4c95..42862452 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/dictionary/SearchImplementation.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/dictionary/SearchImplementation.java @@ -6,6 +6,7 @@ import java.util.List; import java.util.Locale; import java.util.regex.Pattern; import java.util.stream.Collectors; +import java.util.stream.Stream; import org.ahocorasick.trie.Trie; @@ -103,21 +104,19 @@ public class SearchImplementation { } - public List getBoundaries(CharSequence text, TextRange region) { + public Stream getBoundaries(CharSequence text, TextRange region) { if (this.values.isEmpty()) { - return new ArrayList<>(); + return Stream.empty(); } CharSequence subSequence = text.subSequence(region.start(), region.end()); if (this.pattern != null) { return this.pattern.matcher(subSequence).results() - .map(r -> new TextRange(r.start() + region.start(), r.end() + region.start())) - .collect(Collectors.toList()); + .map(r -> new TextRange(r.start() + region.start(), r.end() + region.start())); } else { return this.trie.parseText(subSequence) .stream() - .map(r -> new TextRange(r.getStart() + region.start(), r.getEnd() + region.start() + 1)) - .collect(Collectors.toList()); + .map(r -> new TextRange(r.getStart() + region.start(), r.getEnd() + region.start() + 1)); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Page.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Page.java index 4216471c..04d51c17 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Page.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Page.java @@ -55,7 +55,7 @@ public class Page { return mainBody.stream() .filter(SemanticNode::isLeaf) - .map(SemanticNode::getLeafTextBlock) + .map(SemanticNode::getTextBlock) .collect(new TextBlockCollector()); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/TextBlock.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/TextBlock.java index 89c45693..b8a96db4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/TextBlock.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/TextBlock.java @@ -141,7 +141,11 @@ public interface TextBlock extends CharSequence { default String buildSummary() { - String[] words = getSearchText().split(" "); + String searchText = getSearchText(); + // substring, as splitting very large strings gets expensive + searchText = searchText.substring(0, Math.min(searchText.length(), 200)); + + String[] words = searchText.split(" "); int bound = Math.min(words.length, 4); List list = new ArrayList<>(Arrays.asList(words).subList(0, bound)); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/AnalyzeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/AnalyzeService.java index 6ea05446..226ded0a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/AnalyzeService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/AnalyzeService.java @@ -35,6 +35,7 @@ import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryIncr import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryVersion; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; +import com.iqser.red.service.redaction.v1.server.service.components.ComponentLogCreatorService; import com.iqser.red.service.redaction.v1.server.service.document.DocumentGraphMapper; import com.iqser.red.service.redaction.v1.server.service.document.ImportedRedactionEntryService; import com.iqser.red.service.redaction.v1.server.service.document.ManualRedactionEntryService; @@ -272,11 +273,8 @@ public class AnalyzeService { } log.info("Created entity log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - if (entityLogChanges.hasChanges() || !isReanalysis) { - computeComponentsWhenRulesArePresent(analyzeRequest, kieWrapperComponentRules, document, addedFileAttributes, entityLog); - } - log.info("Stored analysis logs for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); + computeComponentsWhenRulesArePresent(analyzeRequest, kieWrapperComponentRules, document, addedFileAttributes, entityLog); long duration = System.currentTimeMillis() - startTime; @@ -298,6 +296,7 @@ public class AnalyzeService { .wasReanalyzed(isReanalysis) .manualRedactions(analyzeRequest.getManualRedactions()) .addedFileAttributes(addedFileAttributes) + .usedComponentMappings(analyzeRequest.getComponentMappings()) .build(); } @@ -319,13 +318,16 @@ public class AnalyzeService { entityLog, document, addedFileAttributes.stream() - .toList()); + .toList(), + analyzeRequest.getComponentMappings()); + log.info("Finished component rule execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); ComponentLog componentLog = componentLogCreatorService.buildComponentLog(analyzeRequest.getAnalysisNumber(), components, kieWrapperComponentRules.rulesVersion()); redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.COMPONENT_LOG, componentLog); + log.info("Stored component log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/DictionarySearchService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/DictionarySearchService.java index 9610db6e..06d8237a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/DictionarySearchService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/DictionarySearchService.java @@ -59,7 +59,6 @@ public class DictionarySearchService { Set engines = isDossierDictionaryEntry ? Set.of(Engine.DOSSIER_DICTIONARY) : Set.of(Engine.DICTIONARY); EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService); searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange()) - .stream() .filter(boundary -> entityCreationService.isValidEntityTextRange(node.getTextBlock(), boundary)) .forEach(bounds -> entityCreationService.byTextRangeWithEngine(bounds, type, entityType, node, engines) .ifPresent(entity -> { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ComponentLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/components/ComponentLogCreatorService.java similarity index 98% rename from redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ComponentLogCreatorService.java rename to redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/components/ComponentLogCreatorService.java index 1fbdc90e..54af5c5f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ComponentLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/components/ComponentLogCreatorService.java @@ -1,4 +1,4 @@ -package com.iqser.red.service.redaction.v1.server.service; +package com.iqser.red.service.redaction.v1.server.service.components; import java.util.ArrayList; import java.util.HashMap; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/components/ComponentMappingFileSystemCache.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/components/ComponentMappingFileSystemCache.java new file mode 100644 index 00000000..dd348932 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/components/ComponentMappingFileSystemCache.java @@ -0,0 +1,123 @@ +package com.iqser.red.service.redaction.v1.server.service.components; + +import java.io.File; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.Collections; +import java.util.Map; +import java.util.function.Function; +import java.util.stream.Collectors; + +import org.springframework.core.io.InputStreamResource; +import org.springframework.stereotype.Service; +import org.springframework.util.FileSystemUtils; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.iqser.red.service.persistence.service.v1.api.shared.model.component.ComponentMappingMetadata; +import com.iqser.red.storage.commons.service.StorageService; +import com.knecon.fforesight.tenantcommons.TenantContext; + +import jakarta.annotation.PreDestroy; +import lombok.AccessLevel; +import lombok.SneakyThrows; +import lombok.experimental.FieldDefaults; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Service +@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) +public class ComponentMappingFileSystemCache { + + static String METADATA_SUFFIX = ".metadata.json"; + + StorageService storageService; + ObjectMapper mapper; + Path mappingFileDir; + + + @SneakyThrows + public File getComponentMappingFile(ComponentMappingMetadata metadata) { + + Path mappingFile = getMappingFileFromMetadata(metadata); + Path mappingFileMetaDataFile = mappingFile.resolveSibling(metadata.getName() + METADATA_SUFFIX); + synchronized (ComponentMappingFileSystemCache.class) { + + if (fileExistsAndUpToDate(metadata, mappingFile, mappingFileMetaDataFile)) { + return mappingFile.toFile(); + } + + Files.createDirectories(mappingFile.getParent()); + + InputStreamResource inputStreamResource = storageService.getObject(TenantContext.getTenantId(), metadata.getStorageId()); + + Files.write(mappingFile, inputStreamResource.getContentAsByteArray(), StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.CREATE); + + mapper.writeValue(mappingFileMetaDataFile.toFile(), metadata); + + return mappingFile.toFile(); + } + } + + + @SneakyThrows + public Map getAllCachedMappings(String dossierTemplateId) { + + Path tenantMappingFileDir = mappingFileDir.resolve(TenantContext.getTenantId()).resolve(dossierTemplateId); + + synchronized (ComponentMappingFileSystemCache.class) { + + if (!tenantMappingFileDir.toFile().exists()) { + return Collections.emptyMap(); + } + + try (var pathStream = Files.walk(tenantMappingFileDir)) { + return pathStream.filter(path -> path.getFileName().toString().endsWith(METADATA_SUFFIX)) + .map(this::readMetaDataOnDisk) + .collect(Collectors.toMap(Function.identity(), this::getMappingFileFromMetadata)); + } + } + } + + + private Path getMappingFileFromMetadata(ComponentMappingMetadata metadata) { + + Path tenantStem = mappingFileDir.resolve(TenantContext.getTenantId()); + return tenantStem.resolve(metadata.getStorageId()); + } + + + private boolean fileExistsAndUpToDate(ComponentMappingMetadata metadata, Path mappingFile, Path mappingFileMetaDataFile) { + + if (mappingFile.toFile().exists() && mappingFile.toFile().isFile() && mappingFileMetaDataFile.toFile().exists() && mappingFileMetaDataFile.toFile().isFile()) { + ComponentMappingMetadata metaDataOnDisk = readMetaDataOnDisk(mappingFileMetaDataFile); + return metaDataOnDisk.getVersion() >= metadata.getVersion(); + } + return false; + } + + + @SneakyThrows + private ComponentMappingMetadata readMetaDataOnDisk(Path mappingFileMetaDataFile) { + + return mapper.readValue(mappingFileMetaDataFile.toFile(), ComponentMappingMetadata.class); + } + + + @SneakyThrows + public ComponentMappingFileSystemCache(StorageService storageService, ObjectMapper mapper) { + + this.storageService = storageService; + this.mapper = mapper; + this.mappingFileDir = Files.createTempDirectory("componentMappingCache"); + } + + + @PreDestroy + @SneakyThrows + public void cleanUp() { + + FileSystemUtils.deleteRecursively(mappingFileDir); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/components/ComponentMappingMemoryCache.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/components/ComponentMappingMemoryCache.java new file mode 100644 index 00000000..eca6b144 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/components/ComponentMappingMemoryCache.java @@ -0,0 +1,99 @@ +package com.iqser.red.service.redaction.v1.server.service.components; + +import java.io.File; +import java.io.FileReader; +import java.io.Reader; +import java.nio.charset.Charset; +import java.util.List; + +import org.springframework.stereotype.Service; + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.Weigher; +import com.iqser.red.service.persistence.service.v1.api.shared.model.component.ComponentMappingMetadata; +import com.iqser.red.service.redaction.v1.server.model.component.ComponentMapping; +import com.knecon.fforesight.tenantcommons.TenantContext; +import com.opencsv.CSVParserBuilder; +import com.opencsv.CSVReader; +import com.opencsv.CSVReaderBuilder; + +import lombok.AccessLevel; +import lombok.SneakyThrows; +import lombok.experimental.FieldDefaults; + +@Service +@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) +public class ComponentMappingMemoryCache { + + static Weigher COMPONENT_MAPPING_WEIGHER = (String ignored, ComponentMapping mapping) -> mapping.getMetaData().getNumberOfLines(); + public static int MAX_NUMBER_OF_LINES = 1000000; + + Cache cache; // key is TenantId/StorageId + ComponentMappingFileSystemCache fileSystemCache; + + + public ComponentMappingMemoryCache(ComponentMappingFileSystemCache componentMappingFileSystemCache) { + + this.fileSystemCache = componentMappingFileSystemCache; + cache = CacheBuilder.newBuilder().maximumWeight(MAX_NUMBER_OF_LINES).weigher(COMPONENT_MAPPING_WEIGHER).build(); + } + + + public ComponentMapping getComponentMapping(ComponentMappingMetadata metadata) { + + String cacheKey = buildCacheKey(metadata); + ComponentMapping mapping = cache.getIfPresent(cacheKey); + if (mapping == null) { + return loadMapping(cacheKey, metadata); + } + if (mapping.getMetaData().getVersion() < metadata.getVersion()) { + cache.invalidate(cacheKey); + return loadMapping(cacheKey, metadata); + } + return mapping; + } + + + public long numberOfCachedMappings() { + + return cache.size(); + } + + + private ComponentMapping loadMapping(String cacheKey, ComponentMappingMetadata metadata) { + + File mappingFile = this.fileSystemCache.getComponentMappingFile(metadata); + ComponentMapping mapping = loadMappingFromFile(mappingFile, metadata); + cache.put(cacheKey, mapping); + return mapping; + } + + + @SneakyThrows + public static ComponentMapping loadMappingFromFile(File mappingFile, ComponentMappingMetadata metadata) { + + Charset charSet = Charset.forName(metadata.getEncoding()); + List data; + try (Reader fileReader = new FileReader(mappingFile, charSet); CSVReader reader = buildReader(fileReader, metadata.getDelimiter())) { + List rawData = reader.readAll(); + rawData.remove(0); // remove header row + data = rawData; + } + return ComponentMapping.builder().metadata(metadata).data(data).build(); + } + + + @SneakyThrows + private static CSVReader buildReader(Reader reader, char delimiter) { + + return new CSVReaderBuilder(reader).withCSVParser(new CSVParserBuilder().withSeparator(delimiter).build()).build(); + } + + + private static String buildCacheKey(ComponentMappingMetadata metadata) { + + return TenantContext.getTenantId() + "/" + metadata.getStorageId(); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/components/ComponentMappingService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/components/ComponentMappingService.java new file mode 100644 index 00000000..b53dbe77 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/components/ComponentMappingService.java @@ -0,0 +1,35 @@ +package com.iqser.red.service.redaction.v1.server.service.components; + +import java.util.List; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.component.ComponentMappingMetadata; +import com.iqser.red.service.redaction.v1.server.model.component.ComponentMapping; + +import lombok.AccessLevel; +import lombok.RequiredArgsConstructor; +import lombok.experimental.FieldDefaults; + +@RequiredArgsConstructor +@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) +public class ComponentMappingService { + + ComponentMappingMemoryCache mappingCache; + List availableMappings; + + + /** + * Retrieves the mapping by the specified name. If the name does not match any of the available mappings, an empty dummy mapping is returned. + * + * @param name the name of the mapping to be retrieved + * @return A ComponentMapping with the specified name. + */ + public ComponentMapping from(String name) { + + return availableMappings.stream() + .filter(mapping -> mapping.getName().equals(name)) + .findFirst() + .map(mappingCache::getComponentMapping) + .orElse(ComponentMapping.empty(name)); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java index 886c7240..d5f04f6f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java @@ -19,7 +19,6 @@ import org.kie.api.runtime.KieSession; import com.iqser.red.service.redaction.v1.server.model.component.Component; import com.iqser.red.service.redaction.v1.server.model.component.Entity; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell; @@ -473,6 +472,20 @@ public class ComponentCreationService { */ public void joiningFromSameTableRow(String ruleIdentifier, String name, Collection entities) { + joiningFromSameTableRow(ruleIdentifier, name, entities, ", "); + } + + + /** + * Joins values from entities that are in the same table row. They are sorted from left to right. If entities are not in a table cell they are added as a single component. + * + * @param ruleIdentifier the identifier of the rule + * @param name the name of the entity + * @param entities the collection of entities + * @param delimiter the delimiter to separate the values + */ + public void joiningFromSameTableRow(String ruleIdentifier, String name, Collection entities, String delimiter) { + String types = entities.stream() .map(Entity::getType) .sorted(Comparator.reverseOrder()) @@ -497,7 +510,7 @@ public class ComponentCreationService { entitiesInSameRow.stream() .sorted(EntityComparators.first()) .map(Entity::getValue) - .collect(Collectors.joining(", ")), + .collect(Collectors.joining(delimiter)), valueDescription, entitiesInSameRow)); }); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/DocumentGraphMapper.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/DocumentGraphMapper.java index ec9c93e3..42134e26 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/DocumentGraphMapper.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/DocumentGraphMapper.java @@ -3,11 +3,11 @@ package com.iqser.red.service.redaction.v1.server.service.document; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.NoSuchElementException; import com.iqser.red.service.redaction.v1.server.model.document.DocumentData; import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree; @@ -86,8 +86,12 @@ public class DocumentGraphMapper { } List treeId = Arrays.stream(entryData.getTreeId()).boxed() .toList(); - entryData.getEngines() - .forEach(engine -> node.addEngine(engine)); + if (entryData.getEngines() != null) { + entryData.getEngines() + .forEach(node::addEngine); + } else { + entryData.setEngines(Collections.emptySet()); + } node.setTreeId(treeId); switch (entryData.getType()) { @@ -172,7 +176,7 @@ public class DocumentGraphMapper { } - private TextBlock toTextBlock(Long[] atomicTextBlockIds, Context context, SemanticNode parent) { + private TextBlock toTextBlock(Long[] atomicTextBlockIds, Context context, SemanticNode parent) { return Arrays.stream(atomicTextBlockIds) .map(atomicTextBlockId -> getAtomicTextBlock(context, parent, atomicTextBlockId)) @@ -197,9 +201,9 @@ public class DocumentGraphMapper { private Page getPage(Long pageIndex, Context context) { - return context.pageData.stream() - .filter(page -> page.getNumber() == Math.toIntExact(pageIndex)) - .findFirst().orElseThrow(() -> new NoSuchElementException(String.format("ClassificationPage with number %d not found", pageIndex))); + Page page = context.pageData.get(Math.toIntExact(pageIndex) - 1); + assert page.getNumber() == Math.toIntExact(pageIndex); + return page; } @@ -214,7 +218,7 @@ public class DocumentGraphMapper { Context(DocumentData documentData, DocumentTree documentTree) { this.documentTree = documentTree; - this.pageData = new LinkedList<>(); + this.pageData = new ArrayList<>(); this.documentTextData = Arrays.stream(documentData.getDocumentTextData()) .toList(); this.documentPositionData = Arrays.stream(documentData.getDocumentPositionData()) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java index 04d2d408..cd58bbe7 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java @@ -465,7 +465,6 @@ public class EntityCreationService { public Stream bySearchImplementation(SearchImplementation searchImplementation, String type, EntityType entityType, SemanticNode node) { return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange()) - .stream() .filter(boundary -> isValidEntityTextRange(node.getTextBlock(), boundary)) .map(bounds -> byTextRange(bounds, type, entityType, node)) .filter(Optional::isPresent) @@ -487,7 +486,6 @@ public class EntityCreationService { TextBlock textBlock = node.getTextBlock(); SearchImplementation searchImplementation = new SearchImplementation(strings, false); return searchImplementation.getBoundaries(textBlock, node.getTextRange()) - .stream() .map(boundary -> toLineAfterTextRange(textBlock, boundary)) .filter(boundary -> isValidEntityTextRange(textBlock, boundary)) .map(boundary -> byTextRange(boundary, type, entityType, node)) @@ -510,7 +508,6 @@ public class EntityCreationService { TextBlock textBlock = node.getTextBlock(); SearchImplementation searchImplementation = new SearchImplementation(strings, true); return searchImplementation.getBoundaries(textBlock, node.getTextRange()) - .stream() .map(boundary -> toLineAfterTextRange(textBlock, boundary)) .filter(boundary -> isValidEntityTextRange(textBlock, boundary)) .map(boundary -> byTextRange(boundary, type, entityType, node)) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java index 251016d8..ecf0983e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java @@ -186,10 +186,11 @@ public class EntityFindingUtility { node.getPages())); } - SearchImplementation searchImplementation = new SearchImplementation(entryValues.stream().map(String::trim).collect(Collectors.toSet()), true); + SearchImplementation searchImplementation = new SearchImplementation(entryValues.stream() + .map(String::trim) + .collect(Collectors.toSet()), true); return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange()) - .stream() .map(boundary -> entityCreationService.byTextRangeWithEngine(boundary, "temp", EntityType.ENTITY, node, Collections.emptySet())) .filter(Optional::isPresent) .map(Optional::get) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java index 5679b745..0fcec3b4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java @@ -17,13 +17,17 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribu import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState; +import com.iqser.red.service.persistence.service.v1.api.shared.model.component.ComponentMappingMetadata; import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings; import com.iqser.red.service.redaction.v1.server.model.component.Component; import com.iqser.red.service.redaction.v1.server.model.component.Entity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; +import com.iqser.red.service.redaction.v1.server.service.components.ComponentMappingMemoryCache; +import com.iqser.red.service.redaction.v1.server.service.components.ComponentMappingService; import com.iqser.red.service.redaction.v1.server.service.document.ComponentComparator; import com.iqser.red.service.redaction.v1.server.service.document.ComponentCreationService; import com.iqser.red.service.redaction.v1.server.utils.exception.DroolsTimeoutException; +import com.knecon.fforesight.tenantcommons.TenantContext; import lombok.AccessLevel; import lombok.RequiredArgsConstructor; @@ -36,15 +40,28 @@ import lombok.extern.slf4j.Slf4j; @FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true) public class ComponentDroolsExecutionService { + public static final String COMPONENT_MAPPING_SERVICE_GLOBAL = "componentMappingService"; + RedactionServiceSettings settings; + ComponentMappingMemoryCache componentMappingMemoryCache; - public List executeRules(KieContainer kieContainer, EntityLog entityLog, Document document, List fileAttributes) { + public List executeRules(KieContainer kieContainer, + EntityLog entityLog, + Document document, + List fileAttributes, + List componentMappings) { KieSession kieSession = kieContainer.newKieSession(); ComponentCreationService componentCreationService = new ComponentCreationService(kieSession); + ComponentMappingService componentMappingService = new ComponentMappingService(componentMappingMemoryCache, componentMappings); kieSession.setGlobal("componentCreationService", componentCreationService); + + if (hasComponentMappingServiceGlobal(kieSession)) { + kieSession.setGlobal(COMPONENT_MAPPING_SERVICE_GLOBAL, componentMappingService); + } + entityLog.getEntityLogEntry() .stream() .filter(entityLogEntry -> entityLogEntry.getState().equals(EntryState.APPLIED)) @@ -54,7 +71,10 @@ public class ComponentDroolsExecutionService { .filter(f -> f.getValue() != null) .forEach(kieSession::insert); + String tenantId = TenantContext.getTenantId(); + CompletableFuture completableFuture = CompletableFuture.supplyAsync(() -> { + TenantContext.setTenantId(tenantId); kieSession.fireAllRules(); return null; }); @@ -82,6 +102,16 @@ public class ComponentDroolsExecutionService { } + private static boolean hasComponentMappingServiceGlobal(KieSession kieSession) { + + return kieSession.getKieBase().getKiePackages() + .stream() + .flatMap(kiePackage -> kiePackage.getGlobalVariables() + .stream()) + .anyMatch(global -> global.getName().equals(COMPONENT_MAPPING_SERVICE_GLOBAL)); + } + + public List getFileAttributes(KieSession kieSession) { List fileAttributes = new LinkedList<>(); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/DroolsValidationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/DroolsValidationService.java index 751c0fcb..b3509cfe 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/DroolsValidationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/DroolsValidationService.java @@ -44,7 +44,8 @@ public class DroolsValidationService { private final RedactionServiceSettings redactionServiceSettings; private final KieContainerCreationService kieContainerCreationService; private final DeprecatedElementsFinder deprecatedElementsFinder; - private static final Pattern allowedImportsPattern = Pattern.compile("^(?:import\\s+static\\s+)?(?:import\\s+)?(?:com\\.knecon\\.fforesight|com\\.iqser\\.red)\\..*;$"); + private static final Pattern allowedImportsPattern = Pattern.compile( + "^(?:import\\s+static\\s+)?(?:import\\s+)?(?:com\\.knecon\\.fforesight|com\\.iqser\\.red|java\\.util)\\..*;$"); public static final String LINEBREAK_MATCHER = "\\R"; @@ -72,7 +73,7 @@ public class DroolsValidationService { DroolsValidation customValidation = ruleFileBluePrint.getDroolsValidation(); - addSyntaxDeprecatedWarnings(ruleFileBluePrint, customValidation); + addSyntaxDeprecatedWarnings(ruleFileType, ruleFileBluePrint, customValidation); addSyntaxErrorMessages(ruleFileType, ruleFileBluePrint, customValidation); @@ -84,13 +85,33 @@ public class DroolsValidationService { } - private void addSyntaxDeprecatedWarnings(RuleFileBluePrint ruleFileBluePrint, DroolsValidation customValidation) { + private void addSyntaxDeprecatedWarnings(RuleFileType ruleFileType, RuleFileBluePrint ruleFileBluePrint, DroolsValidation customValidation) { + // find deprecated elements in the ruleFileBluePrint DroolsSyntaxDeprecatedWarnings warningMessageForImports = getWarningsForDeprecatedImports(ruleFileBluePrint); if (warningMessageForImports != null) { customValidation.getDeprecatedWarnings().add(warningMessageForImports); } customValidation.getDeprecatedWarnings().addAll(getWarningsForDeprecatedRules(ruleFileBluePrint)); + + if (ruleFileType.equals(RuleFileType.COMPONENT)) { + if (!ruleFileBluePrint.getGlobals().contains(ComponentDroolsExecutionService.COMPONENT_MAPPING_SERVICE_GLOBAL)) { + customValidation.getDeprecatedWarnings().add(buildComponentMappingServiceMissingMessage(ruleFileBluePrint)); + } + + } + } + + + private static DroolsSyntaxDeprecatedWarnings buildComponentMappingServiceMissingMessage(RuleFileBluePrint ruleFileBluePrint) { + + return DroolsSyntaxDeprecatedWarnings.builder() + .message("global ComponentMappingService " + + ComponentDroolsExecutionService.COMPONENT_MAPPING_SERVICE_GLOBAL + + "\n is missing from the rules, consider adding it, as it will be required in future versions!") + .line(ruleFileBluePrint.getGlobalsLine()) + .column(0) + .build(); } @@ -159,12 +180,12 @@ public class DroolsValidationService { .message(String.format("Changing the imports is not allowed! Must be: %n%s", baseRuleFileBluePrint.getImports())) .build()); } - if (!ruleFileBluePrint.getGlobals().equals(baseRuleFileBluePrint.getGlobals())) { + if (!ruleFileBluePrint.getGlobals().contains(baseRuleFileBluePrint.getGlobals())) { customValidation.getSyntaxErrorMessages() .add(DroolsSyntaxErrorMessage.builder() .line(ruleFileBluePrint.getGlobalsLine()) .column(0) - .message(String.format("Changing the globals is not allowed! Must be: %n%s", baseRuleFileBluePrint.getGlobals())) + .message(String.format("Removing the globals is not allowed! Must be: %n%s", baseRuleFileBluePrint.getGlobals())) .build()); } baseRuleFileBluePrint.getQueries() diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/QueryParser.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/QueryParser.java new file mode 100644 index 00000000..f5c57792 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/QueryParser.java @@ -0,0 +1,61 @@ +package com.iqser.red.service.redaction.v1.server.utils; + +import java.io.Reader; +import java.io.StringReader; +import java.util.List; + +import com.iqser.red.service.redaction.v1.server.model.component.ComponentMappingQueryParameters; +import com.iqser.red.service.redaction.v1.server.model.component.QueryOperator; +import com.opencsv.CSVParserBuilder; +import com.opencsv.CSVReader; +import com.opencsv.CSVReaderBuilder; + +import lombok.SneakyThrows; +import lombok.experimental.UtilityClass; + +@UtilityClass +public class QueryParser { + + char QUOTE_CHARACTER = '\''; + char SEPARATOR = ' '; + + + @SneakyThrows + public ComponentMappingQueryParameters parse(String query, List columnLabels) { + + Reader reader = new StringReader(query); + List rawData; + try (CSVReader csvReader = buildReader(reader)) { + rawData = csvReader.readAll(); + + } catch (Exception e) { + return ComponentMappingQueryParameters.invalid(); + } + if (rawData.size() != 1) { + return ComponentMappingQueryParameters.invalid(); + } + String[] tokens = rawData.get(0); + + if (tokens.length != 3) { + return ComponentMappingQueryParameters.invalid(); + } + + String fieldName = tokens[0]; + String operator = tokens[1]; + String value = tokens[2]; + + int idx = columnLabels.indexOf(fieldName); + if (idx < 0) { + return ComponentMappingQueryParameters.invalid(); + } + + return new ComponentMappingQueryParameters(idx, QueryOperator.fromString(operator), value); + } + + + private static CSVReader buildReader(Reader reader) { + + return new CSVReaderBuilder(reader).withCSVParser(new CSVParserBuilder().withSeparator(SEPARATOR).withQuoteChar(QUOTE_CHARACTER).build()).build(); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AnalysisEnd2EndTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AnalysisEnd2EndTest.java index e79035e5..911d4cdb 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AnalysisEnd2EndTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AnalysisEnd2EndTest.java @@ -1,5 +1,9 @@ package com.iqser.red.service.redaction.v1.server; +import static com.iqser.red.service.redaction.v1.server.testcontainers.MongoDBTestContainer.MONGO_DATABASE; +import static com.iqser.red.service.redaction.v1.server.testcontainers.MongoDBTestContainer.MONGO_PASSWORD; +import static com.iqser.red.service.redaction.v1.server.testcontainers.MongoDBTestContainer.MONGO_USERNAME; +import static com.knecon.fforesight.tenantcommons.model.TenantResponse.builder; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.when; @@ -50,9 +54,14 @@ import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryVers import com.iqser.red.service.redaction.v1.server.service.AnalyzeService; import com.iqser.red.service.redaction.v1.server.service.DictionaryService; import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; +import com.iqser.red.service.redaction.v1.server.testcontainers.MongoDBTestContainer; import com.iqser.red.service.redaction.v1.server.utils.exception.NotFoundException; import com.iqser.red.storage.commons.service.StorageService; -import com.knecon.fforesight.tenantcommons.TenantsClient; +import com.knecon.fforesight.mongo.database.commons.liquibase.TenantMongoLiquibaseExecutor; +import com.knecon.fforesight.mongo.database.commons.service.MongoConnectionProvider; +import com.knecon.fforesight.tenantcommons.TenantContext; +import com.knecon.fforesight.tenantcommons.TenantProvider; +import com.knecon.fforesight.tenantcommons.model.MongoDBConnection; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; @@ -70,7 +79,7 @@ import lombok.extern.slf4j.Slf4j; * This way you can recreate what is happening on the stack almost exactly. */ public class AnalysisEnd2EndTest { - Path dossierTemplateToUse = Path.of("/home/kschuettler/iqser/business-logic/redactmanager/prod-cp-eu-reg/EFSA_sanitisation_GFL_v1"); // Add your dossier-template here + Path dossierTemplateToUse = Path.of("/home/kschuettler/iqser/testing dossier templates/Production DocuMine"); // Add your dossier-template here ObjectMapper mapper = ObjectMapperFactory.create(); final String TENANT_ID = "tenant"; @@ -87,11 +96,9 @@ import lombok.extern.slf4j.Slf4j; RabbitTemplate rabbitTemplate; TestDossierTemplate testDossierTemplate; - @MockBean - protected LegalBasisClient legalBasisClient; @MockBean - private TenantsClient tenantsClient; + protected LegalBasisClient legalBasisClient; @MockBean protected RulesClient rulesClient; @@ -99,12 +106,21 @@ import lombok.extern.slf4j.Slf4j; @MockBean protected DictionaryClient dictionaryClient; + @MockBean + private MongoConnectionProvider mongoConnectionProvider; + + @MockBean + private TenantProvider tenantProvider; + + @Autowired + protected TenantMongoLiquibaseExecutor tenantMongoLiquibaseExecutor; + @Test @SneakyThrows public void runAnalysisEnd2End() { - String folder = "files/end2end/file0"; // Should contain all files from minio directly, still zipped. Can contain multiple files. + String folder = "/home/kschuettler/iqser/redaction/redaction-service/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/end2end/broken"; // Should contain all files from minio directly, still zipped. Can contain multiple files. Path absoluteFolderPath; if (folder.startsWith("files")) { // if it starts with "files" it is most likely in the resources folder, else it should be an absolute path @@ -128,6 +144,25 @@ import lombok.extern.slf4j.Slf4j; @BeforeEach public void setup() { + TenantContext.setTenantId(TENANT_ID); + + var mongoInstance = MongoDBTestContainer.getInstance(); + mongoInstance.start(); + + MongoDBConnection mongoDBConnection = MongoDBConnection.builder() + .prefix("mongodb") + .username(MONGO_USERNAME) + .password(MONGO_PASSWORD) + .address(mongoInstance.getHost() + ":" + mongoInstance.getFirstMappedPort()) + .database(MONGO_DATABASE) + .options("") + .build(); + + when(mongoConnectionProvider.getMongoDBConnection(any())).thenReturn(mongoDBConnection); + when(tenantProvider.getTenant(any())).thenReturn(builder().tenantId(TENANT_ID).mongoDBConnection(mongoDBConnection).build()); + + tenantMongoLiquibaseExecutor.initializeTenant("redaction"); + testDossierTemplate = new TestDossierTemplate(dossierTemplateToUse); when(dictionaryService.updateDictionary(any(), any())).thenReturn(new DictionaryVersion(0, 0)); when(dictionaryService.getDeepCopyDictionary(any(), any())).thenReturn(testDossierTemplate.testDictionary); @@ -240,10 +275,12 @@ import lombok.extern.slf4j.Slf4j; Map dossierTemplate = mapper.readValue(dossierTemplateToUse.resolve("dossierTemplate.json").toFile(), HashMap.class); this.id = (String) dossierTemplate.get("dossierTemplateId"); + List dictionaries = Files.walk(dossierTemplateToUse) .filter(path -> path.getFileName().toString().equals("dossierType.json")) .map(this::loadDictionaryModel) .toList(); + File ruleFile = dossierTemplateToUse.resolve("rules.drl").toFile(); rules = new String(Files.readAllBytes(ruleFile.toPath())); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java index c32e7c8f..b6127b05 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java @@ -61,7 +61,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest { // @Disabled public void titleExtraction() throws IOException { - AnalyzeRequest request = uploadFileToStorage("files/syngenta/CustomerFiles/18 Chlorothalonil RAR 08 Volume 3CA B 6a Oct 2017.pdf"); + AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/402Study-ocred.pdf"); ClassPathResource importedRedactionClasspathResource = new ClassPathResource( "files/ImportedRedactions/18 Chlorothalonil RAR 08 Volume 3CA B 6a Oct 2017.IMPORTED_REDACTIONS.json"); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentPerformanceIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentPerformanceIntegrationTest.java index 61c0d89d..8692f648 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentPerformanceIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentPerformanceIntegrationTest.java @@ -314,7 +314,6 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration TextBlock textBlock = document.getTextBlock(); searchImplementation.getBoundaries(textBlock, textBlock.getTextRange()) - .stream() .filter(boundary -> boundaryIsSurroundedBySeparators(textBlock, boundary)) .map(bounds -> TextEntity.initialEntityNode(bounds, type, entityType, document)) .forEach(foundEntities::add); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java index 685ddaae..6a6a4330 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java @@ -5,14 +5,12 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import java.awt.Color; import java.io.File; -import java.io.FileOutputStream; import java.nio.file.Path; import java.util.List; import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; -import org.drools.io.ClassPathResource; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/service/components/mappings/MappingCachesTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/service/components/mappings/MappingCachesTest.java new file mode 100644 index 00000000..ed643670 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/service/components/mappings/MappingCachesTest.java @@ -0,0 +1,248 @@ +package com.iqser.red.service.redaction.v1.server.service.components.mappings; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.File; +import java.io.FileInputStream; + +import org.apache.commons.io.FileUtils; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.springframework.core.io.ClassPathResource; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.iqser.red.commons.jackson.ObjectMapperFactory; +import com.iqser.red.service.persistence.service.v1.api.shared.model.component.ComponentMappingMetadata; +import com.iqser.red.service.redaction.v1.server.model.component.ComponentMapping; +import com.iqser.red.service.redaction.v1.server.service.components.ComponentMappingFileSystemCache; +import com.iqser.red.service.redaction.v1.server.service.components.ComponentMappingMemoryCache; +import com.iqser.red.storage.commons.service.StorageService; +import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService; +import com.knecon.fforesight.tenantcommons.TenantContext; + +import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class MappingCachesTest { + + ObjectMapper mapper = ObjectMapperFactory.create(); + StorageService storageService = new FileSystemBackedStorageService(mapper); + ComponentMappingFileSystemCache componentMappingFileSystemCache = new ComponentMappingFileSystemCache(storageService, mapper); + ComponentMappingMemoryCache componentMappingMemoryCache = new ComponentMappingMemoryCache(componentMappingFileSystemCache); + + String dossierTemplateId = "2c4cfafc-b3c3-48de-8706-a979dea764aa"; + + + @Test + @SneakyThrows + public void testFileSystemCache() { + + TenantContext.setTenantId("tenant-id"); + File guidelineMapping = new ClassPathResource("mappings/GuidelineMapping.csv").getFile(); + ComponentMappingMetadata metadata = mapper.readValue(new ClassPathResource("mappings/GuidelineMapping.csv.metadata.json").getFile(), ComponentMappingMetadata.class); + + try (var in = new FileInputStream(guidelineMapping)) { + storageService.storeObject(TenantContext.getTenantId(), metadata.getStorageId(), in); + } + + assertTrue(componentMappingFileSystemCache.getAllCachedMappings(dossierTemplateId).isEmpty()); + + long start = System.currentTimeMillis(); + File mappingFile = componentMappingFileSystemCache.getComponentMappingFile(metadata); + long initial = System.currentTimeMillis() - start; + + assertTrue(FileUtils.contentEquals(guidelineMapping, mappingFile)); + assertEquals(1, componentMappingFileSystemCache.getAllCachedMappings(dossierTemplateId).size()); + + start = System.currentTimeMillis(); + componentMappingFileSystemCache.getComponentMappingFile(metadata); + long cachedTime = System.currentTimeMillis() - start; + + assertTrue(initial > cachedTime); + + componentMappingFileSystemCache.getComponentMappingFile(metadata); + componentMappingFileSystemCache.getComponentMappingFile(metadata); + componentMappingFileSystemCache.getComponentMappingFile(metadata); + + assertEquals(1, componentMappingFileSystemCache.getAllCachedMappings(dossierTemplateId).size()); + assertEquals(mappingFile, + componentMappingFileSystemCache.getAllCachedMappings(dossierTemplateId) + .get(metadata).toFile()); + } + + + @Test + @SneakyThrows + public void testFileSystemCacheTenantSeparation() { + + TenantContext.setTenantId("tenant-id"); + File guidelineMapping = new ClassPathResource("mappings/GuidelineMapping.csv").getFile(); + ComponentMappingMetadata metadata = mapper.readValue(new ClassPathResource("mappings/GuidelineMapping.csv.metadata.json").getFile(), ComponentMappingMetadata.class); + + try (var in = new FileInputStream(guidelineMapping)) { + storageService.storeObject(TenantContext.getTenantId(), metadata.getStorageId(), in); + } + + componentMappingFileSystemCache.getComponentMappingFile(metadata); + + assertEquals(1, componentMappingFileSystemCache.getAllCachedMappings(dossierTemplateId).size()); + + TenantContext.setTenantId("other-tenant"); + + assertTrue(componentMappingFileSystemCache.getAllCachedMappings(dossierTemplateId).isEmpty()); + } + + + @Test + @SneakyThrows + public void testFileSystemCacheEvict() { + + TenantContext.setTenantId("tenant-id"); + File guidelineMapping = new ClassPathResource("mappings/GuidelineMapping.csv").getFile(); + ComponentMappingMetadata metadata = mapper.readValue(new ClassPathResource("mappings/GuidelineMapping.csv.metadata.json").getFile(), ComponentMappingMetadata.class); + + try (var in = new FileInputStream(guidelineMapping)) { + storageService.storeObject(TenantContext.getTenantId(), metadata.getStorageId(), in); + } + + File mappingFile = componentMappingFileSystemCache.getComponentMappingFile(metadata); + + assertTrue(FileUtils.contentEquals(guidelineMapping, mappingFile)); + assertEquals(1, componentMappingFileSystemCache.getAllCachedMappings(dossierTemplateId).size()); + + File guidelineMapping2 = new ClassPathResource("mappings/GuidelineMapping2.csv").getFile(); + ComponentMappingMetadata metadataVersion1 = mapper.readValue(new ClassPathResource("mappings/GuidelineMapping2.csv.metadata.json").getFile(), + ComponentMappingMetadata.class); + + assertTrue(metadataVersion1.getVersion() > metadata.getVersion()); + + try (var in = new FileInputStream(guidelineMapping2)) { + storageService.storeObject(TenantContext.getTenantId(), metadataVersion1.getStorageId(), in); + } + + File mappingFile2 = componentMappingFileSystemCache.getComponentMappingFile(metadataVersion1); + + assertEquals(1, componentMappingFileSystemCache.getAllCachedMappings(dossierTemplateId).size()); + + assertEquals(mappingFile2, mappingFile); + assertTrue(FileUtils.contentEquals(mappingFile, mappingFile2)); + assertTrue(FileUtils.contentEquals(guidelineMapping2, mappingFile2)); + assertFalse(FileUtils.contentEquals(guidelineMapping, mappingFile2)); + + } + + + @Test + @SneakyThrows + public void testMemoryCache() { + + TenantContext.setTenantId("tenant-id"); + File guidelineMapping = new ClassPathResource("mappings/GuidelineMapping.csv").getFile(); + ComponentMappingMetadata metadata = mapper.readValue(new ClassPathResource("mappings/GuidelineMapping.csv.metadata.json").getFile(), ComponentMappingMetadata.class); + + try (var in = new FileInputStream(guidelineMapping)) { + storageService.storeObject(TenantContext.getTenantId(), metadata.getStorageId(), in); + } + + long start = System.currentTimeMillis(); + ComponentMapping mapping = componentMappingMemoryCache.getComponentMapping(metadata); + long initialRetrieval = System.currentTimeMillis() - start; + assertEquals(30, mapping.getMetaData().getNumberOfLines()); + + start = System.currentTimeMillis(); + ComponentMapping mapping2 = componentMappingMemoryCache.getComponentMapping(metadata); + long subsequentRetrieval = System.currentTimeMillis() - start; + + assertEquals(mapping, mapping2); + assertTrue(subsequentRetrieval < initialRetrieval); + } + + + @Test + @SneakyThrows + public void testMemoryCacheVersionEvict() { + + TenantContext.setTenantId("tenant-id"); + File guidelineMapping = new ClassPathResource("mappings/GuidelineMapping.csv").getFile(); + ComponentMappingMetadata metadata = mapper.readValue(new ClassPathResource("mappings/GuidelineMapping.csv.metadata.json").getFile(), ComponentMappingMetadata.class); + + try (var in = new FileInputStream(guidelineMapping)) { + storageService.storeObject(TenantContext.getTenantId(), metadata.getStorageId(), in); + } + + ComponentMapping mapping = componentMappingMemoryCache.getComponentMapping(metadata); + + File guidelineMapping2 = new ClassPathResource("mappings/GuidelineMapping2.csv").getFile(); + ComponentMappingMetadata metadataVersion1 = mapper.readValue(new ClassPathResource("mappings/GuidelineMapping2.csv.metadata.json").getFile(), + ComponentMappingMetadata.class); + + assertTrue(metadataVersion1.getVersion() > metadata.getVersion()); + + ComponentMapping componentMapping = componentMappingMemoryCache.getComponentMapping(metadata); + + try (var in = new FileInputStream(guidelineMapping2)) { + storageService.storeObject(TenantContext.getTenantId(), metadataVersion1.getStorageId(), in); + } + + ComponentMapping componentMapping2 = componentMappingMemoryCache.getComponentMapping(metadataVersion1); + + Assertions.assertNotEquals(componentMapping, componentMapping2); + + assertEquals(28, componentMapping2.getMetaData().getNumberOfLines()); + assertEquals(1, componentMapping2.getMetaData().getVersion()); + } + + + @Test + @SneakyThrows + public void testMemoryCacheSizeEvict() { + + double numberOfMappings = 1e5; + TenantContext.setTenantId("tenant-id"); + File guidelineMapping = new ClassPathResource("mappings/GuidelineMapping.csv").getFile(); + ComponentMappingMetadata originMetadata = mapper.readValue(new ClassPathResource("mappings/GuidelineMapping.csv.metadata.json").getFile(), ComponentMappingMetadata.class); + for (int i = 0; i < numberOfMappings; i++) { + try (var in = new FileInputStream(guidelineMapping)) { + storageService.storeObject(TenantContext.getTenantId(), originMetadata.getStorageId() + i, in); + } + } + log.info("Stored all files, populating caches"); + long maxCacheSize = 0; + for (int i = 0; i < numberOfMappings; i++) { + ComponentMappingMetadata metadata = ComponentMappingMetadata.builder() + .id(originMetadata.getId() + i) + .name(originMetadata.getName() + i) + .storageId(originMetadata.getStorageId() + i) + .fileName(originMetadata.getFileName()) + .columnLabels(originMetadata.getColumnLabels()) + .encoding(originMetadata.getEncoding()) + .version(0) + .delimiter(originMetadata.getDelimiter()) + .numberOfLines(originMetadata.getNumberOfLines()) + .build(); + ComponentMapping mapping = componentMappingMemoryCache.getComponentMapping(metadata); + ComponentMapping mapping2 = componentMappingMemoryCache.getComponentMapping(metadata); + ComponentMapping mapping3 = componentMappingMemoryCache.getComponentMapping(metadata); + long cacheSize = componentMappingMemoryCache.numberOfCachedMappings(); + if (cacheSize > maxCacheSize) { + maxCacheSize = cacheSize; + } + } + System.out.println("Max cache size: " + maxCacheSize); + assertTrue(maxCacheSize * 0.98 + <= (double) ComponentMappingMemoryCache.MAX_NUMBER_OF_LINES / 30); // cache might grow slightly over its budget, due to lazy garbage collection + } + + + @AfterEach + public void cleanUpStorage() { + + ((FileSystemBackedStorageService) storageService).clearStorage(); + componentMappingFileSystemCache.cleanUp(); + } + +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/service/components/mappings/MappingQueryTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/service/components/mappings/MappingQueryTest.java new file mode 100644 index 00000000..82d446f9 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/service/components/mappings/MappingQueryTest.java @@ -0,0 +1,125 @@ +package com.iqser.red.service.redaction.v1.server.service.components.mappings; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.List; +import java.util.Optional; + +import org.junit.jupiter.api.Test; +import org.springframework.core.io.ClassPathResource; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.iqser.red.commons.jackson.ObjectMapperFactory; +import com.iqser.red.service.persistence.service.v1.api.shared.model.component.ComponentMappingMetadata; +import com.iqser.red.service.redaction.v1.server.model.component.ComponentMapping; +import com.iqser.red.service.redaction.v1.server.service.components.ComponentMappingMemoryCache; + +import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class MappingQueryTest { + + ObjectMapper mapper = ObjectMapperFactory.create(); + + + @Test + @SneakyThrows + public void testMappingQuery() { + + File mappingFile = new ClassPathResource("mappings/GuidelineMapping.csv").getFile(); + ComponentMappingMetadata metadata = mapper.readValue(new ClassPathResource("mappings/GuidelineMapping.csv.metadata.json").getFile(), ComponentMappingMetadata.class); + ComponentMapping mapping = ComponentMappingMemoryCache.loadMappingFromFile(mappingFile, metadata); + + Optional queryResult = mapping.where("number = 425").where("year = 2008").select("description") + .findFirst(); + assertTrue(queryResult.isPresent()); + assertEquals("Nº 425: Acute oral Toxicity - Up-and-Down Procedure (03/10/2008)", queryResult.get()); + + List queryResultList = mapping.where("number = 425").where("year = 2008").select("description") + .toList(); + assertEquals(1, queryResultList.size()); + + queryResultList = mapping.where("number = '425'").where("year contains 2").select("description") + .toList(); + assertEquals(2, queryResultList.size()); + + queryResultList = mapping.where("number = '425'").where("year contains 3").select("description") + .toList(); + assertEquals(0, queryResultList.size()); + + queryResultList = mapping.where("description contains 'Nº 425: Acute oral Toxicity'").select("description") + .toList(); + + assertEquals(2, queryResultList.size()); + } + + + @Test + @SneakyThrows + public void testMappingQueryPerformance() { + + int csvSize = 1000000; + + List data = new ArrayList<>(csvSize); + for (int i = 0; i < csvSize; i++) { + data.add(new String[]{String.valueOf(i),// + String.valueOf(Math.random() * csvSize),// + String.valueOf(Math.random() * csvSize),// + String.valueOf(Math.random() * csvSize)}); + } + + data.sort(Comparator.comparing(s -> s[0])); + + ComponentMappingMetadata metadata = ComponentMappingMetadata.builder().columnLabels(List.of("0", "1", "2", "3")).build(); + ComponentMapping mapping = ComponentMapping.builder().metadata(metadata).data(data).build(); + long[] durations = new long[csvSize]; + long timestamp; + for (int i = 0; i < csvSize; i++) { + timestamp = System.currentTimeMillis(); + String query = String.valueOf((int) (Math.random() * (csvSize - 1))); + assert mapping.existsByFirstColumn(query); + durations[i] = System.currentTimeMillis() - timestamp; + } + assertFalse(mapping.existsByFirstColumn(String.valueOf(csvSize + 1))); + + // Compute mean, median, max, min + long sum = 0; + long max = Long.MIN_VALUE; + long min = Long.MAX_VALUE; + + for (long duration : durations) { + sum += duration; + if (duration > max) { + max = duration; + } + if (duration < min) { + min = duration; + } + } + + double mean = (double) sum / durations.length; + + // Sort array for median calculation + Arrays.sort(durations); + double median; + int n = durations.length; + if (n % 2 == 0) { + median = (durations[n / 2 - 1] + durations[n / 2]) / 2.0; + } else { + median = durations[n / 2]; + } + assertTrue(median < 1); + assertTrue(mean < 1); + assertTrue(max < 100); + + log.info(String.format("Exists check times in ms: \n total: %d\n mean: %.2f\n median: %.2f\n max: %d\n min: %d", sum, mean, median, max, min)); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/QueryParserTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/QueryParserTest.java new file mode 100644 index 00000000..a182d4e8 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/QueryParserTest.java @@ -0,0 +1,64 @@ +package com.iqser.red.service.redaction.v1.server.utils; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.util.List; + +import org.junit.jupiter.api.Test; + +import com.iqser.red.service.redaction.v1.server.model.component.ComponentMappingQueryParameters; +import com.iqser.red.service.redaction.v1.server.model.component.QueryOperator; + +class QueryParserTest { + + @Test + public void testQueryTokenization() { + + List columnLabels = List.of("name", "version", "year", "description"); + String query = "name = peter"; + ComponentMappingQueryParameters parameters = QueryParser.parse(query, columnLabels); + assertEquals(0, parameters.columnIdx()); + assertEquals(QueryOperator.EQUALS, parameters.operator()); + assertEquals("peter", parameters.query()); + + query = "name equals peter"; + parameters = QueryParser.parse(query, columnLabels); + assertEquals(0, parameters.columnIdx()); + assertEquals(QueryOperator.EQUALS, parameters.operator()); + assertEquals("peter", parameters.query()); + + query = "name contains peter"; + parameters = QueryParser.parse(query, columnLabels); + assertEquals(0, parameters.columnIdx()); + assertEquals(QueryOperator.CONTAINS, parameters.operator()); + assertEquals("peter", parameters.query()); + + query = "name matches peter"; + parameters = QueryParser.parse(query, columnLabels); + assertEquals(0, parameters.columnIdx()); + assertEquals(QueryOperator.MATCHES, parameters.operator()); + assertEquals("peter", parameters.query()); + + query = "namematches peter"; + parameters = QueryParser.parse(query, columnLabels); + assertEquals(QueryOperator.INVALID, parameters.operator()); + + query = "name matches 'peter"; + parameters = QueryParser.parse(query, columnLabels); + assertEquals(QueryOperator.INVALID, parameters.operator()); + + query = "name = 'peter der große'"; + parameters = QueryParser.parse(query, columnLabels); + assertEquals(0, parameters.columnIdx()); + assertEquals(QueryOperator.EQUALS, parameters.operator()); + assertEquals("peter der große", parameters.query()); + + query = "name = peter\\'s"; + parameters = QueryParser.parse(query, columnLabels); + assertEquals(0, parameters.columnIdx()); + assertEquals(QueryOperator.EQUALS, parameters.operator()); + assertEquals("peter's", parameters.query()); + + } + +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl index 5f1653e0..dea2834f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl @@ -14,6 +14,7 @@ import java.util.Optional; import com.iqser.red.service.redaction.v1.server.model.component.Component; import com.iqser.red.service.redaction.v1.server.model.component.Entity; +import com.iqser.red.service.redaction.v1.server.service.components.ComponentMappingService; import com.iqser.red.service.redaction.v1.server.service.document.ComponentCreationService; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Change; @@ -26,6 +27,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; global ComponentCreationService componentCreationService +global ComponentMappingService componentMappingService //------------------------------------ queries ------------------------------------ @@ -37,14 +39,6 @@ query "getComponents" $component: Component() end -//------------------------------------ Guideline mapping object ------------------------------------ - -declare GuidelineMapping - number: String - year: String - guideline: String - end - //------------------------------------ Default Components rules ------------------------------------ rule "StudyTitle.0.0: First Title found" @@ -103,56 +97,21 @@ rule "GLPStudy.1.0: GLP Study not found" componentCreationService.create("GLPStudy.1.0", "GLP_Study", "No", "Yes if present, No if not"); end - -rule "TestGuideline.0.0: create OECD number and year guideline mappings" - salience 2 - when - Entity(type == "oecd_guideline_number") - Entity(type == "oecd_guideline_year") - then - insert(new GuidelineMapping("425", "2008", "Nº 425: Acute oral Toxicity - Up-and-Down Procedure (03/10/2008)")); - insert(new GuidelineMapping("425", "2001", "Nº 425: Acute oral Toxicity - Up-and-Down Procedure (17/12/2001)")); - insert(new GuidelineMapping("402", "2017", "Nº 402: Acute Dermal Toxicity (09/10/2017)")); - insert(new GuidelineMapping("402", "1987", "Nº 402: Acute Dermal Toxicity (24/02/1987)")); - insert(new GuidelineMapping("403", "2009", "Nº 403: Acute Inhalation Toxicity (08/09/2009)")); - insert(new GuidelineMapping("403", "1981", "Nº 403: Acute Inhalation Toxicity (12/05/1981)")); - insert(new GuidelineMapping("433", "2018", "Nº 433: Acute Inhalation Toxicity: Fixed Concentration Procedure (27/06/2018)")); - insert(new GuidelineMapping("433", "2017", "Nº 433: Acute Inhalation Toxicity: Fixed Concentration Procedure (09/10/2017)")); - insert(new GuidelineMapping("436", "2009", "Nº 436: Acute Inhalation Toxicity – Acute Toxic Class Method (08/09/2009)")); - insert(new GuidelineMapping("404", "1981", "Nº 404: Acute Dermal Irritation/Corrosion (12/05/1981)")); - insert(new GuidelineMapping("404", "1992", "Nº 404: Acute Dermal Irritation/Corrosion (17/07/1992)")); - insert(new GuidelineMapping("404", "2002", "Nº 404: Acute Dermal Irritation/Corrosion (24/04/2002)")); - insert(new GuidelineMapping("404", "2015", "Nº 404: Acute Dermal Irritation/Corrosion (28/07/2015)")); - insert(new GuidelineMapping("405", "2017", "Nº 405: Acute Eye Irritation/Corrosion (09/10/2017)")); - insert(new GuidelineMapping("405", "2012", "Nº 405: Acute Eye Irritation/Corrosion (02/10/2012)")); - insert(new GuidelineMapping("405", "2002", "Nº 405: Acute Eye Irritation/Corrosion (24/04/2002)")); - insert(new GuidelineMapping("405", "1987", "Nº 405: Acute Eye Irritation/Corrosion (24/02/1987)")); - insert(new GuidelineMapping("429", "2002", "Nº 429: Skin Sensitisation: Local Lymph Node Assay (24/04/2002)")); - insert(new GuidelineMapping("429", "2010", "Nº 429: Skin Sensitisation (23/07/2010)")); - insert(new GuidelineMapping("442A", "2018", "Nº 442A: Skin Sensitization (23/07/2018)")); - insert(new GuidelineMapping("442B", "2018", "Nº 442B: Skin Sensitization (27/06/2018)")); - insert(new GuidelineMapping("471", "1997", "Nº 471: Bacterial Reverse Mutation Test (21/07/1997)")); - insert(new GuidelineMapping("471", "2020", "Nº 471: Bacterial Reverse Mutation Test (26/06/2020)")); - insert(new GuidelineMapping("406", "1992", "Nº 406: Skin Sensitisation (1992)")); - insert(new GuidelineMapping("428", "2004", "Nº 428: Split-Thickness Skin test (2004)")); - insert(new GuidelineMapping("438", "2018", "Nº 438: Eye Irritation (26/06/2018)")); - insert(new GuidelineMapping("439", "2019", "Nº 439: Skin Irritation (2019)")); - insert(new GuidelineMapping("474", "2016", "Nº 474: Micronucleus Bone Marrow Cells Rat (2016)")); - insert(new GuidelineMapping("487", "2016", "Nº 487: Micronucleus Human Lymphocytes (2016)")); - end - rule "TestGuideline.0.1: match OECD number and year with guideline mappings" salience 1 when not Component(name == "Test_Guidelines_1") - GuidelineMapping($year: year, $number: number, $guideline: guideline) - $guidelineNumber: Entity(type == "oecd_guideline_number", value == $number) - $guidelineYear: Entity(type == "oecd_guideline_year", value == $year) + $guidelineNumber: Entity(type == "oecd_guideline_number", $number: value) + $guidelineYear: Entity(type == "oecd_guideline_year", $year: value) then + Optional guidelineMatch = componentMappingService.from("GuidelineMapping").where("number = " + $number).where("year = " + $year).select("description").findAny(); + if (guidelineMatch.isEmpty()) { + return; + } componentCreationService.create( "TestGuideline.0.0", "Test_Guidelines_1", - $guideline, + guidelineMatch.get(), "OECD Number and guideline year mapped!", List.of($guidelineNumber, $guidelineYear) ); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/mappings/GuidelineMapping.csv b/redaction-service-v1/redaction-service-server-v1/src/test/resources/mappings/GuidelineMapping.csv new file mode 100644 index 00000000..c1160540 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/mappings/GuidelineMapping.csv @@ -0,0 +1,30 @@ +number,year,description +402,1987,"Nº 402: Acute Dermal Toxicity (24/02/1987)" +402,2017,"Nº 402: Acute Dermal Toxicity (09/10/2017)" +403,1981,"Nº 403: Acute Inhalation Toxicity (12/05/1981)" +403,2009,"Nº 403: Acute Inhalation Toxicity (08/09/2009)" +404,1981,"Nº 404: Acute Dermal Irritation/Corrosion (12/05/1981)" +404,1992,"Nº 404: Acute Dermal Irritation/Corrosion (17/07/1992)" +404,2002,"Nº 404: Acute Dermal Irritation/Corrosion (24/04/2002)" +404,2015,"Nº 404: Acute Dermal Irritation/Corrosion (28/07/2015)" +405,1987,"Nº 405: Acute Eye Irritation/Corrosion (24/02/1987)" +405,2002,"Nº 405: Acute Eye Irritation/Corrosion (24/04/2002)" +405,2012,"Nº 405: Acute Eye Irritation/Corrosion (02/10/2012)" +405,2017,"Nº 405: Acute Eye Irritation/Corrosion (09/10/2017)" +406,1992,"Nº 406: Skin Sensitisation (1992)" +425,2001,"Nº 425: Acute oral Toxicity - Up-and-Down Procedure (17/12/2001)" +425,2008,"Nº 425: Acute oral Toxicity - Up-and-Down Procedure (03/10/2008)" +428,2004,"Nº 428: Split-Thickness Skin test (2004)" +429,2002,"Nº 429: Skin Sensitisation: Local Lymph Node Assay (24/04/2002)" +429,2010,"Nº 429: Skin Sensitisation (23/07/2010)" +433,2017,"Nº 433: Acute Inhalation Toxicity: Fixed Concentration Procedure (09/10/2017)" +433,2018,"Nº 433: Acute Inhalation Toxicity: Fixed Concentration Procedure (27/06/2018)" +436,2009,"Nº 436: Acute Inhalation Toxicity – Acute Toxic Class Method (08/09/2009)" +438,2018,"Nº 438: Eye Irritation (26/06/2018)" +439,2019,"Nº 439: Skin Irritation (2019)" +442A,2018,"Nº 442A: Skin Sensitization (23/07/2018)" +442B,2018,"Nº 442B: Skin Sensitization (27/06/2018)" +471,1997,"Nº 471: Bacterial Reverse Mutation Test (21/07/1997)" +471,2020,"Nº 471: Bacterial Reverse Mutation Test (26/06/2020)" +474,2016,"Nº 474: Micronucleus Bone Marrow Cells Rat (2016)" +487,2016,"Nº 487: Micronucleus Human Lymphocytes (2016)" diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/mappings/GuidelineMapping.csv.metadata.json b/redaction-service-v1/redaction-service-server-v1/src/test/resources/mappings/GuidelineMapping.csv.metadata.json new file mode 100644 index 00000000..8640b52b --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/mappings/GuidelineMapping.csv.metadata.json @@ -0,0 +1,15 @@ +{ + "id": "f48e8ab0-250b-4a73-aa4d-436a7176841f", + "name": "GuidelineMapping", + "fileName": "GuidelineMapping.csv", + "version": 0, + "columnLabels": [ + "number", + "year", + "description" + ], + "storageId": "2c4cfafc-b3c3-48de-8706-a979dea764aa/f48e8ab0-250b-4a73-aa4d-436a7176841f_GuidelineMapping_GuidelineMapping.csv", + "numberOfLines": 30, + "encoding": "UTF-8", + "delimiter": "," +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/mappings/GuidelineMapping2.csv b/redaction-service-v1/redaction-service-server-v1/src/test/resources/mappings/GuidelineMapping2.csv new file mode 100644 index 00000000..494ad1a3 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/mappings/GuidelineMapping2.csv @@ -0,0 +1,28 @@ +"number","year","description" +"402","1987","Nº 402: Acute Dermal Toxicity (24/02/1987)" +"402","2017","Nº 402: Acute Dermal Toxicity (09/10/2017)" +"403","1981","Nº 403: Acute Inhalation Toxicity (12/05/1981)" +"403","2009","Nº 403: Acute Inhalation Toxicity (08/09/2009)" +"404","1981","Nº 404: Acute Dermal Irritation/Corrosion (12/05/1981)" +"404","1992","Nº 404: Acute Dermal Irritation/Corrosion (17/07/1992)" +"404","2002","Nº 404: Acute Dermal Irritation/Corrosion (24/04/2002)" +"404","2015","Nº 404: Acute Dermal Irritation/Corrosion (28/07/2015)" +"405","1987","Nº 405: Acute Eye Irritation/Corrosion (24/02/1987)" +"405","2002","Nº 405: Acute Eye Irritation/Corrosion (24/04/2002)" +"405","2012","Nº 405: Acute Eye Irritation/Corrosion (02/10/2012)" +"405","2017","Nº 405: Acute Eye Irritation/Corrosion (09/10/2017)" +"406","1992","Nº 406: Skin Sensitisation (1992)" +"425","2001","Nº 425: Acute oral Toxicity - Up-and-Down Procedure (17/12/2001)" +"425","2008","Nº 425: Acute oral Toxicity - Up-and-Down Procedure (03/10/2008)" +"428","2004","Nº 428: Split-Thickness Skin test (2004)" +"429","2002","Nº 429: Skin Sensitisation: Local Lymph Node Assay (24/04/2002)" +"429","2010","Nº 429: Skin Sensitisation (23/07/2010)" +"433","2017","Nº 433: Acute Inhalation Toxicity: Fixed Concentration Procedure (09/10/2017)" +"433","2018","Nº 433: Acute Inhalation Toxicity: Fixed Concentration Procedure (27/06/2018)" +"436","2009","Nº 436: Acute Inhalation Toxicity – Acute Toxic Class Method (08/09/2009)" +"438","2018","Nº 438: Eye Irritation (26/06/2018)" +"439","2019","Nº 439: Skin Irritation (2019)" +"442A","2018","Nº 442A: Skin Sensitization (23/07/2018)" +"442B","2018","Nº 442B: Skin Sensitization (27/06/2018)" +"471","1997","Nº 471: Bacterial Reverse Mutation Test (21/07/1997)" +"471","2020","Nº 471: Bacterial Reverse Mutation Test (26/06/2020)" diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/mappings/GuidelineMapping2.csv.metadata.json b/redaction-service-v1/redaction-service-server-v1/src/test/resources/mappings/GuidelineMapping2.csv.metadata.json new file mode 100644 index 00000000..9d2fb132 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/mappings/GuidelineMapping2.csv.metadata.json @@ -0,0 +1,15 @@ +{ + "id": "f48e8ab0-250b-4a73-aa4d-436a7176841f", + "name": "GuidelineMapping", + "fileName": "GuidelineMapping.csv", + "version": 1, + "columnLabels": [ + "number", + "year", + "description" + ], + "storageId": "2c4cfafc-b3c3-48de-8706-a979dea764aa/f48e8ab0-250b-4a73-aa4d-436a7176841f_GuidelineMapping_GuidelineMapping.csv", + "numberOfLines": 28, + "encoding": "UTF-8", + "delimiter": "," +} \ No newline at end of file