RED-8650 - Support more date formats #377
@ -1,10 +1,19 @@
|
||||
package com.iqser.red.service.redaction.v1.server.utils;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.text.DateFormat;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.time.LocalDate;
|
||||
import java.time.ZoneId;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.DateTimeFormatterBuilder;
|
||||
import java.time.format.DateTimeParseException;
|
||||
import java.time.format.ResolverStyle;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
@ -17,39 +26,65 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class DateConverter {
|
||||
|
||||
static List<SimpleDateFormat> formats = List.of(new SimpleDateFormat("dd MMM yy", Locale.ENGLISH),
|
||||
new SimpleDateFormat("dd MM yyyy", Locale.ENGLISH),
|
||||
new SimpleDateFormat("dd MM yyyy.", Locale.ENGLISH),
|
||||
new SimpleDateFormat("dd MMMM yyyy", Locale.ENGLISH),
|
||||
new SimpleDateFormat("MMMM dd, yyyy", Locale.ENGLISH),
|
||||
new SimpleDateFormat("dd-MMM-yyyy", Locale.ENGLISH));
|
||||
private static DateTimeFormatter DATE_TIME_FORMATTER;
|
||||
|
||||
|
||||
public Optional<Date> parseDate(String dateAsString) {
|
||||
|
||||
Date date = null;
|
||||
for (SimpleDateFormat format : formats) {
|
||||
|
||||
try {
|
||||
date = format.parse(dateAsString);
|
||||
break;
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to parse date from string {}. \n{}", dateAsString, e.getMessage());
|
||||
// ignore, try next...
|
||||
}
|
||||
}
|
||||
if (date == null) {
|
||||
DateTimeFormatter formatter = getDateTimeFormatter();
|
||||
String cleanDate = dateAsString.trim();
|
||||
cleanDate = removeTrailingDot(cleanDate);
|
||||
try {
|
||||
LocalDate localDate = LocalDate.parse(cleanDate, formatter);
|
||||
Date date = Date.from(localDate.atStartOfDay(ZoneId.systemDefault()).toInstant());
|
||||
return Optional.of(date);
|
||||
} catch (DateTimeParseException e) {
|
||||
log.warn("Failed to parse date: {}", cleanDate);
|
||||
return Optional.empty();
|
||||
}
|
||||
return Optional.of(date);
|
||||
|
||||
}
|
||||
|
||||
|
||||
public String convertDate(Date date, String resultFormat) {
|
||||
|
||||
DateFormat resultDateFormat = new SimpleDateFormat(resultFormat, Locale.ENGLISH);
|
||||
|
||||
return resultDateFormat.format(date);
|
||||
}
|
||||
|
||||
|
||||
private DateTimeFormatter getDateTimeFormatter() {
|
||||
|
||||
if (DATE_TIME_FORMATTER == null) {
|
||||
DATE_TIME_FORMATTER = createFormatterFromResource();
|
||||
}
|
||||
return DATE_TIME_FORMATTER;
|
||||
}
|
||||
|
||||
|
||||
private DateTimeFormatter createFormatterFromResource() {
|
||||
|
||||
DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
|
||||
try (BufferedReader reader = new BufferedReader(new InputStreamReader(Objects.requireNonNull(DateConverter.class.getResourceAsStream("/date_formats.txt"))))) {
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
builder.appendOptional(DateTimeFormatter.ofPattern(line.trim(), Locale.ENGLISH));
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Error reading date format file: " + e.getMessage());
|
||||
}
|
||||
return builder.toFormatter().withResolverStyle(ResolverStyle.SMART).withLocale(Locale.ENGLISH);
|
||||
}
|
||||
|
||||
|
||||
private String removeTrailingDot(String dateAsString) {
|
||||
|
||||
String str = dateAsString;
|
||||
if (str != null && !str.isEmpty() && str.charAt(str.length() - 1) == '.') {
|
||||
str = str.substring(0, str.length() - 1);
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,38 @@
|
||||
dd MMM yyyy
|
||||
dd MMM yy
|
||||
dd MM yyyy
|
||||
dd MMMM yyyy
|
||||
MMMM dd, yyyy
|
||||
dd-MMM-yyyy
|
||||
dd.MM.yyyy
|
||||
yyyy/MM/dd
|
||||
yyyy-MM-dd
|
||||
dd-MM-yyyy
|
||||
MMMM d, yyyy
|
||||
d MMMM yyyy
|
||||
MMM d, yyyy
|
||||
d['.'] MMM yyyy
|
||||
d-MMM-yyyy
|
||||
d['th']['st']['nd']['rd'] 'of' MMMM, yyyy
|
||||
MMMM d['th']['st']['nd']['rd'], yyyy
|
||||
yyyy, MMMM d
|
||||
yyyy.MM.dd
|
||||
yyyyMMdd
|
||||
dd-MM-yy
|
||||
dd/MM/yy
|
||||
MMMM d, yy
|
||||
d MMMM, yy
|
||||
d['th']['st']['nd']['rd'] MMM yyyy
|
||||
MMM d['th']['st']['nd']['rd'], yy
|
||||
yyyy-MMM-dd
|
||||
MMM-dd-yyyy
|
||||
dd-MM-yyyy
|
||||
yyyy, MMMM dd
|
||||
dd.MMM.yyyy
|
||||
dd.MMMM.yyyy
|
||||
dd.MMM.yy
|
||||
dd.MMMM.yy
|
||||
dd.MMM-yyyy
|
||||
dd.MMMM-yyyy
|
||||
d['th']['st']['nd']['rd'] MMMM yy
|
||||
d['th']['st']['nd']['rd'] MMMM yyyy
|
||||
@ -1,5 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.server;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import java.io.FileOutputStream;
|
||||
@ -106,6 +108,36 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testConvertingVariousDateFormats() {
|
||||
|
||||
AnalyzeRequest request = uploadFileToStorage("files/dates/date_formats.pdf");
|
||||
System.out.println("Start Full integration test");
|
||||
analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request);
|
||||
System.out.println("Finished structure analysis");
|
||||
analyzeService.analyze(request);
|
||||
System.out.println("Finished analysis");
|
||||
|
||||
var componentLog = redactionStorageService.getComponentLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
assertNotNull(componentLog);
|
||||
|
||||
var experimentalDates = componentLog.getComponentLogEntries().stream().filter(c -> c.getName().equals("Experimental_Starting_Date")).findFirst().get();
|
||||
assertNotNull(experimentalDates);
|
||||
|
||||
String dates = experimentalDates.getComponentValues().get(0).getValue();
|
||||
String[] dateArray = dates.split(", ");
|
||||
boolean allEqual = true;
|
||||
for (String date : dateArray) {
|
||||
if (!"10/01/2022".equals(date)) {
|
||||
allEqual = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assertTrue(allEqual);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
// @Disabled
|
||||
public void testTopOfPage13InNotHeader() throws IOException {
|
||||
|
||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user