RED-9933: DocuMine DateFormat config in dossier templates

This commit is contained in:
Maverick Studer 2024-09-24 11:37:50 +02:00
parent 1f3cf8d529
commit f37e49e8bb
17 changed files with 484 additions and 110 deletions

View File

@ -4,7 +4,7 @@ plugins {
}
description = "redaction-service-api-v1"
val persistenceServiceVersion = "2.570.0-RED9348.0"
val persistenceServiceVersion = "2.572.0"
dependencies {
implementation("org.springframework:spring-web:6.0.12")

View File

@ -16,7 +16,7 @@ val layoutParserVersion = "0.174.0"
val jacksonVersion = "2.15.2"
val droolsVersion = "9.44.0.Final"
val pdfBoxVersion = "3.0.0"
val persistenceServiceVersion = "2.570.0-RED9348.0"
val persistenceServiceVersion = "2.572.0"
val llmServiceVersion = "1.11.0"
val springBootStarterVersion = "3.1.5"
val springCloudVersion = "4.0.4"

View File

@ -0,0 +1,10 @@
package com.iqser.red.service.redaction.v1.server.client;
import org.springframework.cloud.openfeign.FeignClient;
import com.iqser.red.service.persistence.service.v1.api.internal.resources.DateFormatsResource;
@FeignClient(name = "DateFormatsResource", url = "${persistence-service.url}")
public interface DateFormatsClient extends DateFormatsResource {
}

View File

@ -17,7 +17,20 @@ public final class Context {
private String dossierTemplateId;
@Setter
private long ruleVersion;
@Setter
private long dateFormatsVersion;
private int analysisNumber;
private String tenantId;
public Context(String fileId, String dossierId, String dossierTemplateId, long ruleVersion, int analysisNumber, String tenantId) {
this.fileId = fileId;
this.dossierId = dossierId;
this.dossierTemplateId = dossierTemplateId;
this.ruleVersion = ruleVersion;
this.analysisNumber = analysisNumber;
this.tenantId = tenantId;
}
}

View File

@ -101,6 +101,19 @@ public class RedactionMessageReceiver {
format("%.2f", result.getDuration() / 1000.0));
log.info("----------------------------------------------------------------------------------");
break;
case REANALYSE_COMPONENTS_ONLY:
log.info("------------------------------Components Only Reanalysis------------------------------------------");
log.info("Starting Components Only Reanalysis for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
log.debug(analyzeRequest.getManualRedactions().toString());
result = analyzeService.reanalyzeComponentsOnly(analyzeRequest);
log.info("Successfully reanalyzed the components for dossier {} file {} took: {} s",
analyzeRequest.getDossierId(),
analyzeRequest.getFileId(),
format("%.2f", result.getDuration() / 1000.0));
log.info("----------------------------------------------------------------------------------");
break;
case SURROUNDING_TEXT_ANALYSIS:
log.info("------------------------------Surrounding Text Analysis------------------------------------------");
log.info("Starting Surrounding Text Analysis for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
@ -110,6 +123,7 @@ public class RedactionMessageReceiver {
log.info("-------------------------------------------------------------------------------------------------");
shouldRespond = false;
break;
case IMPORTED_REDACTIONS_ONLY:
log.info("------------------------------Imported Redactions Analysis Only------------------------------------------");
log.info("Starting Imported Redactions Analysis Only for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
@ -118,14 +132,19 @@ public class RedactionMessageReceiver {
log.info("Successful Imported Redactions Analysis Only dossier {} file {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId());
log.info("-------------------------------------------------------------------------------------------------");
break;
case SEARCH_BULK_LOCAL_TERM:
log.info("------------------------------Search Term occurrences for bulk local add ------------------------------------------");
log.info("Starting term search for {} for file {} in dossier {}", analyzeRequest.getBulkLocalRequest().getSearchTerm(), analyzeRequest.getFileId(), analyzeRequest.getDossierId());
log.info("Starting term search for {} for file {} in dossier {}",
analyzeRequest.getBulkLocalRequest().getSearchTerm(),
analyzeRequest.getFileId(),
analyzeRequest.getDossierId());
documentSearchService.searchTermOccurrences(analyzeRequest);
log.info("Successfully located all term occurrences dossier {} file {} ", analyzeRequest.getDossierId(), analyzeRequest.getFileId());
log.info("-------------------------------------------------------------------------------------------------");
shouldRespond = false;
break;
default:
throw new IllegalArgumentException("Unknown MessageType: " + analyzeRequest.getMessageType());
}

View File

@ -8,6 +8,8 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import org.springframework.beans.factory.annotation.Qualifier;
@ -121,8 +123,21 @@ public class AnalysisPreparationService {
@SneakyThrows
public ReanalysisSetupData getReanalysisSetupData(AnalyzeRequest analyzeRequest) {
CompletableFuture<EntityLog> entityLogFuture = CompletableFuture.supplyAsync(() -> getEntityLog(analyzeRequest), taskExecutor);
return getReanalysisSetupData(analyzeRequest, () -> getEntityLogWithoutEntries(analyzeRequest));
}
@SneakyThrows
public ReanalysisSetupData getReanalysisSetupDataForComponentsOnlyReanalyze(AnalyzeRequest analyzeRequest) {
return getReanalysisSetupData(analyzeRequest, () -> getEntityLog(analyzeRequest));
}
@SneakyThrows
private ReanalysisSetupData getReanalysisSetupData(AnalyzeRequest analyzeRequest, Supplier<EntityLog> entityLogSupplier) {
CompletableFuture<EntityLog> entityLogFuture = CompletableFuture.supplyAsync(entityLogSupplier, taskExecutor);
CompletableFuture<Document> documentFuture = CompletableFuture.supplyAsync(() -> getDocument(analyzeRequest), taskExecutor);
CompletableFuture.allOf(entityLogFuture, documentFuture).join();
@ -281,14 +296,22 @@ public class AnalysisPreparationService {
}
private EntityLog getEntityLog(AnalyzeRequest analyzeRequest) {
private EntityLog getEntityLogWithoutEntries(AnalyzeRequest analyzeRequest) {
EntityLog entityLogWithoutEntries = redactionStorageService.getEntityLogWithoutEntries(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
log.info("Loaded previous entity log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
log.info("Loaded previous entity log without entries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
return entityLogWithoutEntries;
}
private EntityLog getEntityLog(AnalyzeRequest analyzeRequest) {
EntityLog entityLog = redactionStorageService.getEntityLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
log.info("Loaded full entity log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
return entityLog;
}
private SectionsToReanalyzeData getDictionaryIncrementAndSectionsToReanalyze(AnalyzeRequest analyzeRequest,
DictionaryIncrement dictionaryIncrement,
ReanalysisSetupData reanalysisSetupData,

View File

@ -9,6 +9,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@ -20,6 +21,7 @@ import com.iqser.gin4.commons.metrics.meters.FunctionTimerValues;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.componentlog.ComponentLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogChanges;
@ -27,6 +29,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.mapper.ImportedLegalBasisMapper;
import com.iqser.red.service.persistence.service.v1.api.shared.mongo.service.EntityLogMongoService;
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
import com.iqser.red.service.redaction.v1.server.logger.Context;
import com.iqser.red.service.redaction.v1.server.model.KieWrapper;
@ -57,6 +60,7 @@ public class AnalyzeService {
ComponentDroolsExecutionService componentDroolsExecutionService;
DictionarySearchService dictionarySearchService;
EntityLogCreatorService entityLogCreatorService;
EntityLogMongoService entityLogMongoService;
ComponentLogCreatorService componentLogCreatorService;
RedactionStorageService redactionStorageService;
RedactionServiceSettings redactionServiceSettings;
@ -107,8 +111,6 @@ public class AnalyzeService {
context);
}
context.setRuleVersion(initialProcessingData.kieWrapperEntityRules().rulesVersion());
ReanalysisFinalProcessingData finalProcessingData = analysisPreparationService.getReanalysisFinalProcessingData(analyzeRequest, setupData, initialProcessingData);
dictionarySearchService.addDictionaryEntities(finalProcessingData.dictionary(), initialProcessingData.sectionsToReAnalyse());
@ -119,6 +121,7 @@ public class AnalyzeService {
.collect(Collectors.toList());
// we could add the imported redactions similar to the manual redactions here as well for additional processing
context.setRuleVersion(initialProcessingData.kieWrapperEntityRules().rulesVersion());
List<FileAttribute> allFileAttributes = entityDroolsExecutionService.executeRules(initialProcessingData.kieWrapperEntityRules().container(),
setupData.document(),
initialProcessingData.sectionsToReAnalyse(),
@ -150,6 +153,49 @@ public class AnalyzeService {
}
@SneakyThrows
@Timed("redactmanager_reanalyzeComponentsOnly")
@Observed(name = "AnalyzeService", contextualName = "reanalyzeComponentsOnly")
public AnalyzeResult reanalyzeComponentsOnly(AnalyzeRequest analyzeRequest) {
long startTime = System.currentTimeMillis();
ReanalysisSetupData setupData = analysisPreparationService.getReanalysisSetupDataForComponentsOnlyReanalyze(analyzeRequest);
Context context = new Context(analyzeRequest.getFileId(),
analyzeRequest.getDossierId(),
analyzeRequest.getDossierTemplateId(),
0,
analyzeRequest.getAnalysisNumber(),
TenantContext.getTenantId());
Optional<EntityLog> entityLog = entityLogMongoService.findEntityLogByDossierIdAndFileId(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
// not yet ready for reanalysis
if (entityLog.isEmpty() || setupData.document() == null || setupData.document().getNumberOfPages() == 0) {
return analyze(analyzeRequest);
}
KieWrapper kieWrapperComponentRules = analysisPreparationService.getKieWrapper(analyzeRequest, RuleFileType.COMPONENT);
EntityLogChanges entityLogChanges = EntityLogChanges.builder()
.newEntityLogEntries(new ArrayList<>())
.updatedEntityLogEntries(new ArrayList<>())
.entityLog(entityLog.get())
.build();
return finalizeAnalysis(analyzeRequest,
startTime,
kieWrapperComponentRules,
entityLogChanges,
setupData.document(),
setupData.document().getNumberOfPages(),
true,
new HashSet<>(analyzeRequest.getFileAttributes()),
context);
}
@SneakyThrows
@Timed("redactmanager_analyze")
@Observed(name = "AnalyzeService", contextualName = "analyze")
@ -202,6 +248,7 @@ public class AnalyzeService {
context);
}
@Timed("redactmanager_analyzeImportedRedactionsOnly")
@Observed(name = "AnalyzeService", contextualName = "analyzeImportedRedactionsOnly")
public AnalyzeResult analyzeImportedRedactionsOnly(AnalyzeRequest analyzeRequest) {
@ -237,7 +284,14 @@ public class AnalyzeService {
notFoundImportedEntitiesService.processEntityLog(entityLogChanges.getEntityLog(), analyzeRequest, analysisData.notFoundImportedEntries());
return finalizeAnalysis(analyzeRequest, startTime, analysisData.kieWrapperComponentRules(), entityLogChanges, analysisData.document(), analysisData.document().getNumberOfPages(), false, new HashSet<>(),
return finalizeAnalysis(analyzeRequest,
startTime,
analysisData.kieWrapperComponentRules(),
entityLogChanges,
analysisData.document(),
analysisData.document().getNumberOfPages(),
false,
new HashSet<>(),
context);
}
@ -272,13 +326,14 @@ public class AnalyzeService {
log.info("Created entity log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
computeComponentsWhenRulesArePresent(analyzeRequest, kieWrapperComponentRules, document, addedFileAttributes, entityLog, context);
context.setRuleVersion(kieWrapperComponentRules.rulesVersion());
Optional<ComponentLog> componentLog = computeComponentsWhenRulesArePresent(analyzeRequest, kieWrapperComponentRules, document, addedFileAttributes, entityLog, context);
long duration = System.currentTimeMillis() - startTime;
redactmanagerAnalyzePagewiseValues.increase(numberOfPages, duration);
return AnalyzeResult.builder()
AnalyzeResult analyzeResult = AnalyzeResult.builder()
.dossierId(analyzeRequest.getDossierId())
.fileId(analyzeRequest.getFileId())
.duration(duration)
@ -296,18 +351,22 @@ public class AnalyzeService {
.addedFileAttributes(addedFileAttributes)
.usedComponentMappings(analyzeRequest.getComponentMappings())
.build();
componentLog.ifPresent(value -> analyzeResult.setDateFormatsVersion(value.getDateFormatsVersion()));
return analyzeResult;
}
private void computeComponentsWhenRulesArePresent(AnalyzeRequest analyzeRequest,
KieWrapper kieWrapperComponentRules,
Document document,
Set<FileAttribute> addedFileAttributes,
EntityLog entityLog,
Context context) {
private Optional<ComponentLog> computeComponentsWhenRulesArePresent(AnalyzeRequest analyzeRequest,
KieWrapper kieWrapperComponentRules,
Document document,
Set<FileAttribute> addedFileAttributes,
EntityLog entityLog,
Context context) {
if (!kieWrapperComponentRules.isPresent()) {
return;
return Optional.empty();
}
// We need the latest EntityLog entries for components rules execution
@ -322,11 +381,16 @@ public class AnalyzeService {
log.info("Finished component rule execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
ComponentLog componentLog = componentLogCreatorService.buildComponentLog(analyzeRequest.getAnalysisNumber(), components, kieWrapperComponentRules.rulesVersion());
ComponentLog componentLog = componentLogCreatorService.buildComponentLog(analyzeRequest.getAnalysisNumber(),
components,
kieWrapperComponentRules.rulesVersion(),
context.getDateFormatsVersion());
redactionStorageService.saveComponentLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), componentLog);
log.info("Stored component log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
return Optional.of(componentLog);
}
}

View File

@ -20,7 +20,7 @@ import com.iqser.red.service.redaction.v1.server.service.document.EntityComparat
@Service
public class ComponentLogCreatorService {
public ComponentLog buildComponentLog(int analysisNumber, List<Component> components, long componentRulesVersion) {
public ComponentLog buildComponentLog(int analysisNumber, List<Component> components, long componentRulesVersion, long dateFormatsVersion) {
Map<String, List<ComponentLogEntryValue>> map = new HashMap<>();
components.stream()
@ -33,7 +33,7 @@ public class ComponentLogCreatorService {
.stream()
.map(entry -> new ComponentLogEntry(entry.getKey(), entry.getValue(), entry.getValue(), false))
.toList();
return new ComponentLog(analysisNumber, componentRulesVersion, componentLogComponents);
return new ComponentLog(analysisNumber, componentRulesVersion, dateFormatsVersion, componentLogComponents);
}

View File

@ -0,0 +1,73 @@
package com.iqser.red.service.redaction.v1.server.service.components;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
import com.iqser.red.service.redaction.v1.server.client.DateFormatsClient;
import com.iqser.red.service.redaction.v1.server.utils.DateConverter;
import com.knecon.fforesight.tenantcommons.TenantContext;
import lombok.AccessLevel;
import lombok.experimental.FieldDefaults;
@Service
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class DateConverterMemoryCache {
ConcurrentMap<String, DateConverterCacheEntry> cache = new ConcurrentHashMap<>();
DateFormatsClient dateFormatsClient;
public DateConverterMemoryCache(DateFormatsClient dateFormatsClient) {
this.dateFormatsClient = dateFormatsClient;
}
public DateConverterCacheEntry getDateConverter(String dossierTemplateId) {
String tenantId = TenantContext.getTenantId();
String cacheKey = buildCacheKey(tenantId, dossierTemplateId);
DateConverterCacheEntry cacheEntry = cache.get(cacheKey);
long latestVersion = dateFormatsClient.getVersion(dossierTemplateId);
if (cacheEntry != null && cacheEntry.version() >= latestVersion) {
return cacheEntry;
}
synchronized (this) {
cacheEntry = cache.get(cacheKey);
if (cacheEntry != null && cacheEntry.version() >= latestVersion) {
return cacheEntry;
}
DateConverter dateConverter = loadDateConverter(dossierTemplateId);
DateConverterCacheEntry dateConverterCacheEntry = new DateConverterCacheEntry(dateConverter, latestVersion);
cache.put(cacheKey, dateConverterCacheEntry);
return dateConverterCacheEntry;
}
}
private DateConverter loadDateConverter(String dossierTemplateId) {
JSONPrimitive<String> dateFormats = dateFormatsClient.getDateFormats(dossierTemplateId);
return new DateConverter(dateFormats.getValue());
}
private static String buildCacheKey(String tenantId, String dossierTemplateId) {
return tenantId + "/" + dossierTemplateId;
}
public record DateConverterCacheEntry(DateConverter dateConverter, long version) {
}
}

View File

@ -30,17 +30,23 @@ import com.iqser.red.service.redaction.v1.server.utils.ComponentCreationUtils;
import com.iqser.red.service.redaction.v1.server.utils.DateConverter;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.experimental.FieldDefaults;
@RequiredArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class ComponentCreationService {
KieSession kieSession;
DateConverter dateConverter;
Set<Entity> referencedEntities = new HashSet<>();
public ComponentCreationService(KieSession kieSession, DateConverter dateConverter) {
this.kieSession = kieSession;
this.dateConverter = dateConverter;
}
/**
* Joins entity values, and creates a component from the result.
*
@ -399,7 +405,7 @@ public class ComponentCreationService {
for (Entity entity : entities) {
String value = entity.getValue();
Optional<Date> optionalDate = DateConverter.parseDate(value);
Optional<Date> optionalDate = dateConverter.parseDate(value);
if (optionalDate.isPresent()) {
dates.add(optionalDate.get());
} else {
@ -410,7 +416,7 @@ public class ComponentCreationService {
String formattedDateStrings = Stream.concat(//
dates.stream()
.sorted()
.map(date -> DateConverter.convertDate(date, resultFormat)), //
.map(date -> dateConverter.convertDate(date, resultFormat)), //
unparsedDates.stream())//
.collect(Collectors.joining(", "));

View File

@ -15,6 +15,7 @@ import org.kie.api.runtime.rule.QueryResults;
import org.kie.api.runtime.rule.QueryResultsRow;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.internal.resources.DateFormatsResource;
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
@ -22,6 +23,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState;
import com.iqser.red.service.persistence.service.v1.api.shared.model.component.ComponentMappingMetadata;
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
import com.iqser.red.service.redaction.v1.server.client.DateFormatsClient;
import com.iqser.red.service.redaction.v1.server.logger.Context;
import com.iqser.red.service.redaction.v1.server.logger.ObjectTrackingEventListener;
import com.iqser.red.service.redaction.v1.server.logger.RulesLogger;
@ -31,9 +33,11 @@ import com.iqser.red.service.redaction.v1.server.model.component.Entity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.service.components.ComponentMappingMemoryCache;
import com.iqser.red.service.redaction.v1.server.service.components.ComponentMappingService;
import com.iqser.red.service.redaction.v1.server.service.components.DateConverterMemoryCache;
import com.iqser.red.service.redaction.v1.server.service.document.ComponentComparator;
import com.iqser.red.service.redaction.v1.server.service.document.ComponentCreationService;
import com.iqser.red.service.redaction.v1.server.service.websocket.WebSocketService;
import com.iqser.red.service.redaction.v1.server.utils.DateConverter;
import com.iqser.red.service.redaction.v1.server.utils.exception.DroolsTimeoutException;
import com.knecon.fforesight.tenantcommons.TenantContext;
@ -52,6 +56,7 @@ public class ComponentDroolsExecutionService {
RedactionServiceSettings settings;
ComponentMappingMemoryCache componentMappingMemoryCache;
DateConverterMemoryCache dateConverterMemoryCache;
WebSocketService webSocketService;
@ -63,7 +68,9 @@ public class ComponentDroolsExecutionService {
Context context) {
KieSession kieSession = kieContainer.newKieSession();
ComponentCreationService componentCreationService = new ComponentCreationService(kieSession);
DateConverterMemoryCache.DateConverterCacheEntry dateConverterCacheEntry = dateConverterMemoryCache.getDateConverter(context.getDossierTemplateId());
context.setDateFormatsVersion(dateConverterCacheEntry.version());
ComponentCreationService componentCreationService = new ComponentCreationService(kieSession, dateConverterCacheEntry.dateConverter());
ComponentMappingService componentMappingService = new ComponentMappingService(componentMappingMemoryCache, componentMappings);
RulesLogger logger = new RulesLogger(webSocketService, context);
if (settings.isDroolsDebug()) {

View File

@ -1,50 +1,64 @@
package com.iqser.red.service.redaction.v1.server.utils;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.time.LocalDate;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.time.format.DateTimeParseException;
import java.time.format.ResolverStyle;
import java.time.temporal.ChronoField;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import lombok.AccessLevel;
import lombok.experimental.FieldDefaults;
import lombok.experimental.UtilityClass;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.DateFormatPatternErrorMessage;
import com.iqser.red.service.persistence.service.v1.api.shared.model.utils.DateTimeFormatterProvider;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@UtilityClass
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class DateConverter {
private static DateTimeFormatter DATE_TIME_FORMATTER;
private final DateTimeFormatter dateTimeFormatter;
private static final List<Locale> LOCALES = Arrays.asList(Locale.UK, Locale.US);
private static int BASE_YEAR = 1950; // base year 1950 means, that "yy" will be interpreted in range 1950-2049
public DateConverter() {
List<DateFormatPatternErrorMessage> errors = new ArrayList<>();
this.dateTimeFormatter = DateTimeFormatterProvider.createFormatterFromResource("/dateFormats.txt", errors);
if (!errors.isEmpty()) {
throw new RuntimeException("Errors occurred while loading date formats: " + String.join(", ",
errors.stream()
.map(DateFormatPatternErrorMessage::getMessage)
.toList()));
}
}
public DateConverter(String dateFormats) {
List<DateFormatPatternErrorMessage> errors = new ArrayList<>();
this.dateTimeFormatter = DateTimeFormatterProvider.createFormatterFromInput(dateFormats, errors);
if (!errors.isEmpty()) {
throw new RuntimeException("Errors occurred while loading date formats: " + String.join(", ",
errors.stream()
.map(DateFormatPatternErrorMessage::getMessage)
.toList()));
}
}
public Optional<Date> parseDate(String dateAsString) {
DateTimeFormatter formatter = getDateTimeFormatter();
String cleanDate = dateAsString.trim();
cleanDate = removeTrailingDot(cleanDate);
for (Locale locale : LOCALES) {
try {
return convertToDate(locale, cleanDate, formatter);
return convertToDate(locale, cleanDate, this.dateTimeFormatter);
} catch (DateTimeParseException e) {
try {
Optional<String> extractedDate = DateExtractorNatty.extractDate(cleanDate);
@ -53,7 +67,7 @@ public class DateConverter {
return Optional.empty();
} else {
cleanDate = extractedDate.get();
return convertToDate(locale, cleanDate, formatter);
return convertToDate(locale, cleanDate, this.dateTimeFormatter);
}
} catch (DateTimeParseException exception) {
log.debug("Failed to parse date: {} with locale: {}", cleanDate, locale);
@ -63,7 +77,6 @@ public class DateConverter {
log.warn("Failed to parse date: {}", cleanDate);
return Optional.empty();
}
@ -82,72 +95,12 @@ public class DateConverter {
}
private DateTimeFormatter getDateTimeFormatter() {
if (DATE_TIME_FORMATTER == null) {
DATE_TIME_FORMATTER = createFormatterFromResource();
}
return DATE_TIME_FORMATTER;
}
private DateTimeFormatter createFormatterFromResource() {
DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
builder.parseCaseInsensitive();
try (BufferedReader reader = new BufferedReader(new InputStreamReader(Objects.requireNonNull(DateConverter.class.getResourceAsStream("/date_formats.txt"))))) {
String line;
while ((line = reader.readLine()) != null) {
String pattern = line.trim();
if (!pattern.isEmpty()) {
if (hasTwoDigitsForYear(pattern)) {
builder.appendOptional(setBaseYear(pattern));
} else {
builder.appendOptional(DateTimeFormatter.ofPattern(pattern, Locale.UK));
}
}
}
} catch (IOException e) {
throw new RuntimeException("Error reading date format file: " + e.getMessage(), e);
}
return builder.toFormatter().withResolverStyle(ResolverStyle.SMART).withLocale(Locale.UK);
}
private boolean hasTwoDigitsForYear(String input) {
// Regex to match any string with exactly two 'y' characters
Pattern pattern = Pattern.compile("^[^y]*(y[^y]*){2}$");
Matcher matcher = pattern.matcher(input);
return matcher.matches();
}
private DateTimeFormatter setBaseYear(String pattern) {
DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
if (pattern.startsWith("yy")) {
String editedPattern = pattern.substring(2);
builder.appendValueReduced(ChronoField.YEAR_OF_ERA, 2, 2, BASE_YEAR).appendPattern(editedPattern).toFormatter();
} else if (pattern.endsWith("yy")) {
String editedPattern = pattern.substring(0, pattern.length() - 2);
builder.appendPattern(editedPattern).appendValueReduced(ChronoField.YEAR_OF_ERA, 2, 2, BASE_YEAR).toFormatter();
} else {
throw new RuntimeException("Date format not supported: " + pattern);
}
return builder.toFormatter();
}
private String removeTrailingDot(String dateAsString) {
String str = dateAsString;
if (str != null && !str.isEmpty() && str.charAt(str.length() - 1) == '.') {
str = str.substring(0, str.length() - 1);
if (dateAsString != null && !dateAsString.isEmpty() && dateAsString.charAt(dateAsString.length() - 1) == '.') {
return dateAsString.substring(0, dateAsString.length() - 1);
}
return str;
return dateAsString;
}
}

View File

@ -50,6 +50,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemp
import com.iqser.red.service.persistence.service.v1.api.shared.mongo.repository.EntityLogDocumentRepository;
import com.iqser.red.service.persistence.service.v1.api.shared.mongo.repository.EntityLogEntryDocumentRepository;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService;
import com.iqser.red.service.redaction.v1.server.client.DateFormatsClient;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
@ -212,6 +213,9 @@ public abstract class AbstractRedactionIntegrationTest {
@MockBean
protected RulesClient rulesClient;
@MockBean
protected DateFormatsClient dateFormatsClient;
@MockBean
protected DictionaryClient dictionaryClient;

View File

@ -59,6 +59,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
private static final String RULES = loadFromClassPath("drools/documine_flora.drl");
private static final String COMPONENT_RULES = loadFromClassPath("drools/documine_flora_components.drl");
private static final String DATE_FORMATS = loadFromClassPath("dateFormats.txt");
@Test
@ -216,6 +217,8 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(JSONPrimitive.of(RULES));
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.COMPONENT)).thenReturn(System.currentTimeMillis());
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.COMPONENT)).thenReturn(JSONPrimitive.of(COMPONENT_RULES));
when(dateFormatsClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(System.currentTimeMillis());
when(dateFormatsClient.getDateFormats(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(JSONPrimitive.of(DATE_FORMATS));
loadDictionaryForTest();
loadTypeForTest();

View File

@ -8,11 +8,15 @@ import java.util.List;
import java.util.Optional;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import com.iqser.red.service.redaction.v1.server.utils.DateConverter;
public class DateConverterTest {
private final DateConverter dateConverter = new DateConverter();
@Test
public void testDateConverter() {
@ -46,7 +50,7 @@ public class DateConverterTest {
"31 August 2018 (animal 1)");
for (String dateStr : goldenStandardDates) {
Optional<Date> parsedDate = DateConverter.parseDate(dateStr);
Optional<Date> parsedDate = dateConverter.parseDate(dateStr);
assertTrue(parsedDate.isPresent(), "Failed to parse date: " + dateStr);
}
}

View File

@ -0,0 +1,195 @@
package com.iqser.red.service.redaction.v1.server.service.components.mappings;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNotSame;
import static org.junit.jupiter.api.Assertions.assertSame;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import java.util.Optional;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.Mock;
import org.mockito.MockitoAnnotations;
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
import com.iqser.red.service.redaction.v1.server.client.DateFormatsClient;
import com.iqser.red.service.redaction.v1.server.service.components.DateConverterMemoryCache;
import com.iqser.red.service.redaction.v1.server.utils.DateConverter;
import com.knecon.fforesight.tenantcommons.TenantContext;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public class DateConverterMemoryCacheTest {
@Mock
private DateFormatsClient mockDateFormatsClient;
private DateConverterMemoryCache dateConverterMemoryCache;
private final String tenantId = "tenant-id";
private final String dossierTemplateId = "dossier-template-id";
private final String initialDateFormats = """
dd/MM/yyyy
MM-dd-yyyy
""";
private final String updatedDateFormats = "yyyy.MM.dd";
@BeforeEach
void setUp() {
MockitoAnnotations.openMocks(this);
dateConverterMemoryCache = new DateConverterMemoryCache(mockDateFormatsClient);
TenantContext.clear();
}
@Test
void testGetDateConverter_CachesSuccessfully() {
TenantContext.setTenantId(tenantId);
when(mockDateFormatsClient.getVersion(dossierTemplateId)).thenReturn(1L);
when(mockDateFormatsClient.getDateFormats(dossierTemplateId)).thenReturn(new JSONPrimitive<>(initialDateFormats));
DateConverter converter1 = dateConverterMemoryCache.getDateConverter(dossierTemplateId).dateConverter();
assertNotNull(converter1);
verify(mockDateFormatsClient, times(1)).getDateFormats(dossierTemplateId);
DateConverter converter2 = dateConverterMemoryCache.getDateConverter(dossierTemplateId).dateConverter();
assertSame(converter1, converter2);
verify(mockDateFormatsClient, times(1)).getDateFormats(dossierTemplateId);
}
@Test
void testGetDateConverter_UpdatesCacheOnVersionChange() {
TenantContext.setTenantId(tenantId);
when(mockDateFormatsClient.getVersion(dossierTemplateId)).thenReturn(1L);
when(mockDateFormatsClient.getDateFormats(dossierTemplateId)).thenReturn(new JSONPrimitive<>(initialDateFormats));
DateConverter converter1 = dateConverterMemoryCache.getDateConverter(dossierTemplateId).dateConverter();
assertNotNull(converter1);
verify(mockDateFormatsClient, times(1)).getVersion(dossierTemplateId);
verify(mockDateFormatsClient, times(1)).getDateFormats(dossierTemplateId);
when(mockDateFormatsClient.getVersion(dossierTemplateId)).thenReturn(2L);
when(mockDateFormatsClient.getDateFormats(dossierTemplateId)).thenReturn(new JSONPrimitive<>(updatedDateFormats));
DateConverter converter2 = dateConverterMemoryCache.getDateConverter(dossierTemplateId).dateConverter();
assertNotNull(converter2);
assertNotSame(converter1, converter2);
verify(mockDateFormatsClient, times(2)).getDateFormats(dossierTemplateId);
}
@Test
void testGetDateConverter_TenantSeparation() {
String otherTenantId = "other-tenant-id";
TenantContext.setTenantId(tenantId);
when(mockDateFormatsClient.getVersion(dossierTemplateId)).thenReturn(1L);
when(mockDateFormatsClient.getDateFormats(dossierTemplateId)).thenReturn(new JSONPrimitive<>(initialDateFormats));
DateConverter converterTenant1 = dateConverterMemoryCache.getDateConverter(dossierTemplateId).dateConverter();
assertNotNull(converterTenant1);
verify(mockDateFormatsClient, times(1)).getVersion(dossierTemplateId);
verify(mockDateFormatsClient, times(1)).getDateFormats(dossierTemplateId);
TenantContext.setTenantId(otherTenantId);
when(mockDateFormatsClient.getVersion(dossierTemplateId)).thenReturn(1L);
when(mockDateFormatsClient.getDateFormats(dossierTemplateId)).thenReturn(new JSONPrimitive<>(updatedDateFormats));
DateConverter converterTenant2 = dateConverterMemoryCache.getDateConverter(dossierTemplateId).dateConverter();
assertNotNull(converterTenant2);
assertNotSame(converterTenant1, converterTenant2);
verify(mockDateFormatsClient, times(2)).getVersion(dossierTemplateId);
verify(mockDateFormatsClient, times(2)).getDateFormats(dossierTemplateId);
}
@Test
void testGetDateConverter_ConcurrentAccess() throws InterruptedException {
TenantContext.setTenantId(tenantId);
when(mockDateFormatsClient.getVersion(dossierTemplateId)).thenReturn(1L);
when(mockDateFormatsClient.getDateFormats(dossierTemplateId)).thenReturn(new JSONPrimitive<>(initialDateFormats));
int threadCount = 10;
Thread[] threads = new Thread[threadCount];
DateConverter[] converters = new DateConverter[threadCount];
for (int i = 0; i < threadCount; i++) {
final int index = i;
threads[i] = new Thread(() -> {
converters[index] = dateConverterMemoryCache.getDateConverter(dossierTemplateId).dateConverter();
});
threads[i].start();
}
for (Thread thread : threads) {
thread.join();
}
for (int i = 1; i < threadCount; i++) {
assertSame(converters[0], converters[i]);
}
verify(mockDateFormatsClient, times(1)).getDateFormats(dossierTemplateId);
}
@Test
void testGetDateConverter_InvalidDateFormats_ThrowsException() {
TenantContext.setTenantId(tenantId);
when(mockDateFormatsClient.getVersion(dossierTemplateId)).thenReturn(1L);
when(mockDateFormatsClient.getDateFormats(dossierTemplateId)).thenReturn(new JSONPrimitive<>("invalid-date-format"));
RuntimeException exception = assertThrows(RuntimeException.class, () -> {
dateConverterMemoryCache.getDateConverter(dossierTemplateId);
});
assertTrue(exception.getMessage().contains("Errors occurred while loading date formats"));
verify(mockDateFormatsClient, times(1)).getDateFormats(dossierTemplateId);
}
@Test
void testGetDateConverter_ParseDateSuccessfully() {
TenantContext.setTenantId(tenantId);
String dateFormats = """
dd/MM/yyyy
MM-dd-yyyy
""";
when(mockDateFormatsClient.getVersion(dossierTemplateId)).thenReturn(1L);
when(mockDateFormatsClient.getDateFormats(dossierTemplateId)).thenReturn(new JSONPrimitive<>(dateFormats));
DateConverter converter = dateConverterMemoryCache.getDateConverter(dossierTemplateId).dateConverter();
assertNotNull(converter);
Optional<java.util.Date> parsedDate = converter.parseDate("25/12/2023");
assertTrue(parsedDate.isPresent());
parsedDate = converter.parseDate("12-25-2023");
assertTrue(parsedDate.isPresent());
parsedDate = converter.parseDate("invalid-date");
assertFalse(parsedDate.isPresent());
}
}