RED-9159 - Improve date conversion #404
@ -65,6 +65,8 @@ dependencies {
|
||||
|
||||
implementation("org.reflections:reflections:0.10.2")
|
||||
|
||||
implementation("com.joestelmach:natty:0.13")
|
||||
|
||||
testImplementation(project(":rules-management"))
|
||||
testImplementation("org.apache.pdfbox:pdfbox:${pdfBoxVersion}")
|
||||
testImplementation("org.apache.pdfbox:pdfbox-tools:${pdfBoxVersion}")
|
||||
|
||||
@ -11,7 +11,9 @@ import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.DateTimeFormatterBuilder;
|
||||
import java.time.format.DateTimeParseException;
|
||||
import java.time.format.ResolverStyle;
|
||||
import java.util.Arrays;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
@ -27,6 +29,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||
public class DateConverter {
|
||||
|
||||
private static DateTimeFormatter DATE_TIME_FORMATTER;
|
||||
private static final List<Locale> LOCALES = Arrays.asList(Locale.UK, Locale.US);
|
||||
|
||||
|
||||
public Optional<Date> parseDate(String dateAsString) {
|
||||
@ -34,15 +37,29 @@ public class DateConverter {
|
||||
DateTimeFormatter formatter = getDateTimeFormatter();
|
||||
String cleanDate = dateAsString.trim();
|
||||
cleanDate = removeTrailingDot(cleanDate);
|
||||
try {
|
||||
LocalDate localDate = LocalDate.parse(cleanDate, formatter);
|
||||
Date date = Date.from(localDate.atStartOfDay(ZoneId.systemDefault()).toInstant());
|
||||
return Optional.of(date);
|
||||
} catch (DateTimeParseException e) {
|
||||
log.warn("Failed to parse date: {}", cleanDate);
|
||||
return Optional.empty();
|
||||
|
||||
for (Locale locale : LOCALES) {
|
||||
try {
|
||||
return convertToDate(locale, cleanDate, formatter);
|
||||
} catch (DateTimeParseException e) {
|
||||
try {
|
||||
Optional<String> extractedDate = DateExtractorNatty.extractDate(cleanDate);
|
||||
if (extractedDate.isEmpty()) {
|
||||
log.warn("Failed to extract a valid date from value: {}", cleanDate);
|
||||
return Optional.empty();
|
||||
} else {
|
||||
cleanDate = extractedDate.get();
|
||||
return convertToDate(locale, cleanDate, formatter);
|
||||
}
|
||||
} catch (DateTimeParseException exception) {
|
||||
log.debug("Failed to parse date: {} with locale: {}", cleanDate, locale);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log.warn("Failed to parse date: {}", cleanDate);
|
||||
return Optional.empty();
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -53,6 +70,14 @@ public class DateConverter {
|
||||
}
|
||||
|
||||
|
||||
private Optional<Date> convertToDate(Locale locale, String cleanDate, DateTimeFormatter formatter) {
|
||||
|
||||
LocalDate localDate = LocalDate.parse(cleanDate, formatter.withLocale(locale));
|
||||
Date date = Date.from(localDate.atStartOfDay(ZoneId.systemDefault()).toInstant());
|
||||
return Optional.of(date);
|
||||
}
|
||||
|
||||
|
||||
private DateTimeFormatter getDateTimeFormatter() {
|
||||
|
||||
if (DATE_TIME_FORMATTER == null) {
|
||||
@ -65,13 +90,17 @@ public class DateConverter {
|
||||
private DateTimeFormatter createFormatterFromResource() {
|
||||
|
||||
DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
|
||||
builder.parseCaseInsensitive();
|
||||
try (BufferedReader reader = new BufferedReader(new InputStreamReader(Objects.requireNonNull(DateConverter.class.getResourceAsStream("/date_formats.txt"))))) {
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
builder.appendOptional(DateTimeFormatter.ofPattern(line.trim(), Locale.UK));
|
||||
String pattern = line.trim();
|
||||
if (!pattern.isEmpty()) {
|
||||
builder.appendOptional(DateTimeFormatter.ofPattern(pattern, Locale.UK));
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Error reading date format file: " + e.getMessage());
|
||||
throw new RuntimeException("Error reading date format file: " + e.getMessage(), e);
|
||||
}
|
||||
return builder.toFormatter().withResolverStyle(ResolverStyle.SMART).withLocale(Locale.UK);
|
||||
}
|
||||
|
||||
@ -0,0 +1,26 @@
|
||||
package com.iqser.red.service.redaction.v1.server.utils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
import com.joestelmach.natty.DateGroup;
|
||||
import com.joestelmach.natty.Parser;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
public class DateExtractorNatty {
|
||||
|
||||
public Optional<String> extractDate(String text) {
|
||||
|
||||
Parser parser = new Parser();
|
||||
List<DateGroup> groups = parser.parse(text);
|
||||
if (!groups.isEmpty()) {
|
||||
DateGroup group = groups.get(0);
|
||||
String dateText = group.getText();
|
||||
return Optional.of(dateText);
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,6 +1,7 @@
|
||||
dd-MMM-yyyy
|
||||
dd MMM yyyy
|
||||
dd MMM yy
|
||||
d MMM yy
|
||||
dd MM yyyy
|
||||
dd MMMM yyyy
|
||||
MMMM dd, yyyy
|
||||
@ -34,5 +35,6 @@ dd.MMM.yy
|
||||
dd.MMMM.yy
|
||||
dd.MMM-yyyy
|
||||
dd.MMMM-yyyy
|
||||
dd. MMM yy
|
||||
d['th']['st']['nd']['rd'] MMMM yy
|
||||
d['th']['st']['nd']['rd'] MMMM yyyy
|
||||
@ -0,0 +1,54 @@
|
||||
package com.iqser.red.service.redaction.v1.server.date;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.utils.DateConverter;
|
||||
|
||||
public class DateConverterTest {
|
||||
|
||||
@Test
|
||||
public void testDateConverter() {
|
||||
|
||||
List<String> goldenStandardDates = Arrays.asList("3 Jun 08",
|
||||
"09. Apr 09",
|
||||
"07-Sep-2010",
|
||||
"26-FEB-2008",
|
||||
"30-APR-2008",
|
||||
"30-apr-2008",
|
||||
"30-Apr-2008",
|
||||
"1 Apr 08",
|
||||
"26-FEB-2008",
|
||||
"19-MAR-2008",
|
||||
"1 Apr 08",
|
||||
"27-MAR-2008",
|
||||
"06-MAY-2008",
|
||||
"3 Apr 08",
|
||||
"12-MAR-2008",
|
||||
"08-APR-2008",
|
||||
"1 Apr 08",
|
||||
"4 Apr 08",
|
||||
"13 November 2017 (animal 1)",
|
||||
"16 November 2017 (animal 1)",
|
||||
"27 March 2018 (animal 1 - 5000 mg/kg bw)",
|
||||
"10 April 2018 (animal 1 - 5000 mg/kg bw)",
|
||||
"13 November 2017 (animal 1)",
|
||||
"16 November 2017 (animal 1)",
|
||||
"28 March 2018 (animal 1 - 5000 mg/kg bw)",
|
||||
"28 March 2018 (animal1 - 5000 mg/kg bw)",
|
||||
"28 August 2018 (animal 1)",
|
||||
"31 August 2018 (animal 1)");
|
||||
|
||||
for (String dateStr : goldenStandardDates) {
|
||||
Optional<Date> parsedDate = DateConverter.parseDate(dateStr);
|
||||
assertTrue(parsedDate.isPresent(), "Failed to parse date: " + dateStr);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,28 @@
|
||||
package com.iqser.red.service.redaction.v1.server.date;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import java.util.Optional;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.utils.DateExtractorNatty;
|
||||
|
||||
public class DateExtractorNattyTest {
|
||||
|
||||
@Test
|
||||
public void testExtractDate() {
|
||||
|
||||
String[] testStrings = {"13 November 2017 (animal 1)", "16 November 2017 (animal 1)", "27 March 2018 (animal 1 - 5000 mg/kg bw)", "10 April 2018 (animal 1 - 5000 mg/kg bw)", "13 November 2017 (animal 1)", "16 November 2017 (animal 1)", "28 March 2018 (animal 1 - 5000 mg/kg bw)", "28 March 2018 (animal1 - 5000 mg/kg bw)", "28 August 2018 (animal 1)", "31 August 2018 (animal 1)"};
|
||||
|
||||
String[] expectedDates = {"13 November 2017", "16 November 2017", "27 March 2018", "10 April 2018", "13 November 2017", "16 November 2017", "28 March 2018", "28 March 2018", "28 August 2018", "31 August 2018"};
|
||||
|
||||
for (int i = 0; i < testStrings.length; i++) {
|
||||
Optional<String> extractedDate = DateExtractorNatty.extractDate(testStrings[i]);
|
||||
assertTrue(extractedDate.isPresent(), "No date found in: " + testStrings[i]);
|
||||
assertEquals(expectedDates[i], extractedDate.get(), "Failed to extract correct date from: " + testStrings[i]);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user