Merge branch 'RED-8650' into 'master'

RED-8650 - Add support for more date formats

Closes RED-8650

See merge request redactmanager/redaction-service!373
This commit is contained in:
Andrei Isvoran 2024-04-15 14:55:12 +02:00
commit 167f27138d
4 changed files with 131 additions and 24 deletions

View File

@ -1,12 +1,23 @@
package com.iqser.red.service.redaction.v1.server.utils; package com.iqser.red.service.redaction.v1.server.utils;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.DateFormat; import java.text.DateFormat;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.time.LocalDate;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.time.format.DateTimeParseException;
import java.time.format.ResolverStyle;
import java.util.Date; import java.util.Date;
import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Optional; import java.util.Optional;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource;
import lombok.AccessLevel; import lombok.AccessLevel;
import lombok.experimental.FieldDefaults; import lombok.experimental.FieldDefaults;
import lombok.experimental.UtilityClass; import lombok.experimental.UtilityClass;
@ -17,39 +28,66 @@ import lombok.extern.slf4j.Slf4j;
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) @FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class DateConverter { public class DateConverter {
static List<SimpleDateFormat> formats = List.of(new SimpleDateFormat("dd MMM yy", Locale.ENGLISH), private static DateTimeFormatter DATE_TIME_FORMATTER;
new SimpleDateFormat("dd MM yyyy", Locale.ENGLISH),
new SimpleDateFormat("dd MM yyyy.", Locale.ENGLISH),
new SimpleDateFormat("dd MMMM yyyy", Locale.ENGLISH),
new SimpleDateFormat("MMMM dd, yyyy", Locale.ENGLISH),
new SimpleDateFormat("dd-MMM-yyyy", Locale.ENGLISH));
public Optional<Date> parseDate(String dateAsString) { public Optional<Date> parseDate(String dateAsString) {
Date date = null; DateTimeFormatter formatter = getDateTimeFormatter("date_formats.txt");
for (SimpleDateFormat format : formats) { String cleanDate = dateAsString.trim();
cleanDate = removeTrailingDot(cleanDate);
try { try {
date = format.parse(dateAsString); LocalDate localDate = LocalDate.parse(cleanDate, formatter);
break; Date date = Date.from(localDate.atStartOfDay(ZoneId.systemDefault()).toInstant());
} catch (Exception e) { return Optional.of(date);
log.warn("Failed to parse date from string {}. \n{}", dateAsString, e.getMessage()); } catch (DateTimeParseException e) {
// ignore, try next... log.warn("Failed to parse date: {}", cleanDate);
}
}
if (date == null) {
return Optional.empty(); return Optional.empty();
} }
return Optional.of(date);
} }
public String convertDate(Date date, String resultFormat) { public String convertDate(Date date, String resultFormat) {
DateFormat resultDateFormat = new SimpleDateFormat(resultFormat, Locale.ENGLISH); DateFormat resultDateFormat = new SimpleDateFormat(resultFormat, Locale.ENGLISH);
return resultDateFormat.format(date); return resultDateFormat.format(date);
} }
private DateTimeFormatter getDateTimeFormatter(String resourcePath) {
if (DATE_TIME_FORMATTER == null) {
DATE_TIME_FORMATTER = createFormatterFromResource(resourcePath);
}
return DATE_TIME_FORMATTER;
}
private DateTimeFormatter createFormatterFromResource(String resourcePath) {
DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
Resource resource = new ClassPathResource(resourcePath);
try (BufferedReader reader = new BufferedReader(new InputStreamReader(resource.getInputStream()))) {
String line;
while ((line = reader.readLine()) != null) {
builder.appendOptional(DateTimeFormatter.ofPattern(line.trim(), Locale.ENGLISH));
}
} catch (IOException e) {
throw new RuntimeException("Error reading date format file: " + e.getMessage());
}
return builder.toFormatter().withResolverStyle(ResolverStyle.SMART).withLocale(Locale.ENGLISH);
}
private String removeTrailingDot(String dateAsString) {
String str = dateAsString;
if (str != null && !str.isEmpty() && str.charAt(str.length() - 1) == '.') {
str = str.substring(0, str.length() - 1);
}
return str;
}
} }

View File

@ -0,0 +1,38 @@
dd MMM yyyy
dd MMM yy
dd MM yyyy
dd MMMM yyyy
MMMM dd, yyyy
dd-MMM-yyyy
dd.MM.yyyy
yyyy/MM/dd
yyyy-MM-dd
dd-MM-yyyy
MMMM d, yyyy
d MMMM yyyy
MMM d, yyyy
d['.'] MMM yyyy
d-MMM-yyyy
d['th']['st']['nd']['rd'] 'of' MMMM, yyyy
MMMM d['th']['st']['nd']['rd'], yyyy
yyyy, MMMM d
yyyy.MM.dd
yyyyMMdd
dd-MM-yy
dd/MM/yy
MMMM d, yy
d MMMM, yy
d['th']['st']['nd']['rd'] MMM yyyy
MMM d['th']['st']['nd']['rd'], yy
yyyy-MMM-dd
MMM-dd-yyyy
dd-MM-yyyy
yyyy, MMMM dd
dd.MMM.yyyy
dd.MMMM.yyyy
dd.MMM.yy
dd.MMMM.yy
dd.MMM-yyyy
dd.MMMM-yyyy
d['th']['st']['nd']['rd'] MMMM yy
d['th']['st']['nd']['rd'] MMMM yyyy

View File

@ -1,14 +1,13 @@
package com.iqser.red.service.redaction.v1.server; package com.iqser.red.service.redaction.v1.server;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.when; import static org.mockito.Mockito.when;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.util.List; import java.util.List;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
@ -30,10 +29,12 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequ
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType; import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive; import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest; import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse; import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils; import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.storage.commons.StorageAutoConfiguration; import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService; import com.iqser.red.storage.commons.service.StorageService;
import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService; import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService;
@ -136,6 +137,36 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
} }
@Test
public void testConvertingVariousDateFormats() {
AnalyzeRequest request = uploadFileToStorage("files/dates/date_formats.pdf");
System.out.println("Start Full integration test");
analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request);
System.out.println("Finished structure analysis");
analyzeService.analyze(request);
System.out.println("Finished analysis");
var componentLog = redactionStorageService.getComponentLog(TEST_DOSSIER_ID, TEST_FILE_ID);
assertNotNull(componentLog);
var experimentalDates = componentLog.getComponentLogEntries().stream().filter(c -> c.getName().equals("Experimental_Starting_Date")).findFirst().get();
assertNotNull(experimentalDates);
String dates = experimentalDates.getComponentValues().get(0).getValue();
String[] dateArray = dates.split(", ");
boolean allEqual = true;
for (String date : dateArray) {
if (!"10/01/2022".equals(date)) {
allEqual = false;
break;
}
}
assertTrue(allEqual);
}
@Configuration @Configuration
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class}) @EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class})
@Import(LayoutParsingServiceProcessorConfiguration.class) @Import(LayoutParsingServiceProcessorConfiguration.class)