RED-8650 - Support more date formats #377

Merged
andrei.isvoran.ext merged 1 commits from RED-8650-bp into release/4.190.x 2024-04-17 14:54:56 +02:00
4 changed files with 126 additions and 21 deletions

View File

@ -1,10 +1,19 @@
package com.iqser.red.service.redaction.v1.server.utils;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.time.LocalDate;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.time.format.DateTimeParseException;
import java.time.format.ResolverStyle;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.Optional;
import lombok.AccessLevel;
@ -17,39 +26,65 @@ import lombok.extern.slf4j.Slf4j;
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class DateConverter {
static List<SimpleDateFormat> formats = List.of(new SimpleDateFormat("dd MMM yy", Locale.ENGLISH),
new SimpleDateFormat("dd MM yyyy", Locale.ENGLISH),
new SimpleDateFormat("dd MM yyyy.", Locale.ENGLISH),
new SimpleDateFormat("dd MMMM yyyy", Locale.ENGLISH),
new SimpleDateFormat("MMMM dd, yyyy", Locale.ENGLISH),
new SimpleDateFormat("dd-MMM-yyyy", Locale.ENGLISH));
private static DateTimeFormatter DATE_TIME_FORMATTER;
public Optional<Date> parseDate(String dateAsString) {
Date date = null;
for (SimpleDateFormat format : formats) {
try {
date = format.parse(dateAsString);
break;
} catch (Exception e) {
log.warn("Failed to parse date from string {}. \n{}", dateAsString, e.getMessage());
// ignore, try next...
}
}
if (date == null) {
DateTimeFormatter formatter = getDateTimeFormatter();
String cleanDate = dateAsString.trim();
cleanDate = removeTrailingDot(cleanDate);
try {
LocalDate localDate = LocalDate.parse(cleanDate, formatter);
Date date = Date.from(localDate.atStartOfDay(ZoneId.systemDefault()).toInstant());
return Optional.of(date);
} catch (DateTimeParseException e) {
log.warn("Failed to parse date: {}", cleanDate);
return Optional.empty();
}
return Optional.of(date);
}
public String convertDate(Date date, String resultFormat) {
DateFormat resultDateFormat = new SimpleDateFormat(resultFormat, Locale.ENGLISH);
return resultDateFormat.format(date);
}
private DateTimeFormatter getDateTimeFormatter() {
if (DATE_TIME_FORMATTER == null) {
DATE_TIME_FORMATTER = createFormatterFromResource();
}
return DATE_TIME_FORMATTER;
}
private DateTimeFormatter createFormatterFromResource() {
DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
try (BufferedReader reader = new BufferedReader(new InputStreamReader(Objects.requireNonNull(DateConverter.class.getResourceAsStream("/date_formats.txt"))))) {
String line;
while ((line = reader.readLine()) != null) {
builder.appendOptional(DateTimeFormatter.ofPattern(line.trim(), Locale.ENGLISH));
}
} catch (IOException e) {
throw new RuntimeException("Error reading date format file: " + e.getMessage());
}
return builder.toFormatter().withResolverStyle(ResolverStyle.SMART).withLocale(Locale.ENGLISH);
}
private String removeTrailingDot(String dateAsString) {
String str = dateAsString;
if (str != null && !str.isEmpty() && str.charAt(str.length() - 1) == '.') {
str = str.substring(0, str.length() - 1);
}
return str;
}
}

View File

@ -0,0 +1,38 @@
dd MMM yyyy
dd MMM yy
dd MM yyyy
dd MMMM yyyy
MMMM dd, yyyy
dd-MMM-yyyy
dd.MM.yyyy
yyyy/MM/dd
yyyy-MM-dd
dd-MM-yyyy
MMMM d, yyyy
d MMMM yyyy
MMM d, yyyy
d['.'] MMM yyyy
d-MMM-yyyy
d['th']['st']['nd']['rd'] 'of' MMMM, yyyy
MMMM d['th']['st']['nd']['rd'], yyyy
yyyy, MMMM d
yyyy.MM.dd
yyyyMMdd
dd-MM-yy
dd/MM/yy
MMMM d, yy
d MMMM, yy
d['th']['st']['nd']['rd'] MMM yyyy
MMM d['th']['st']['nd']['rd'], yy
yyyy-MMM-dd
MMM-dd-yyyy
dd-MM-yyyy
yyyy, MMMM dd
dd.MMM.yyyy
dd.MMMM.yyyy
dd.MMM.yy
dd.MMMM.yy
dd.MMM-yyyy
dd.MMMM-yyyy
d['th']['st']['nd']['rd'] MMMM yy
d['th']['st']['nd']['rd'] MMMM yyyy

View File

@ -1,5 +1,7 @@
package com.iqser.red.service.redaction.v1.server;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.when;
import java.io.FileOutputStream;
@ -106,6 +108,36 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
}
@Test
public void testConvertingVariousDateFormats() {
AnalyzeRequest request = uploadFileToStorage("files/dates/date_formats.pdf");
System.out.println("Start Full integration test");
analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request);
System.out.println("Finished structure analysis");
analyzeService.analyze(request);
System.out.println("Finished analysis");
var componentLog = redactionStorageService.getComponentLog(TEST_DOSSIER_ID, TEST_FILE_ID);
assertNotNull(componentLog);
var experimentalDates = componentLog.getComponentLogEntries().stream().filter(c -> c.getName().equals("Experimental_Starting_Date")).findFirst().get();
assertNotNull(experimentalDates);
String dates = experimentalDates.getComponentValues().get(0).getValue();
String[] dateArray = dates.split(", ");
boolean allEqual = true;
for (String date : dateArray) {
if (!"10/01/2022".equals(date)) {
allEqual = false;
break;
}
}
assertTrue(allEqual);
}
@Test
// @Disabled
public void testTopOfPage13InNotHeader() throws IOException {