diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/DateConverter.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/DateConverter.java index 8d7435a7..dd8b5618 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/DateConverter.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/DateConverter.java @@ -11,12 +11,15 @@ import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatterBuilder; import java.time.format.DateTimeParseException; import java.time.format.ResolverStyle; +import java.time.temporal.ChronoField; import java.util.Arrays; import java.util.Date; import java.util.List; import java.util.Locale; import java.util.Objects; import java.util.Optional; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import lombok.AccessLevel; import lombok.experimental.FieldDefaults; @@ -30,6 +33,7 @@ public class DateConverter { private static DateTimeFormatter DATE_TIME_FORMATTER; private static final List LOCALES = Arrays.asList(Locale.UK, Locale.US); + private static int BASE_YEAR = 1950; // base year 1950 means, that "yy" will be interpreted in range 1950-2049 public Optional parseDate(String dateAsString) { @@ -96,7 +100,11 @@ public class DateConverter { while ((line = reader.readLine()) != null) { String pattern = line.trim(); if (!pattern.isEmpty()) { - builder.appendOptional(DateTimeFormatter.ofPattern(pattern, Locale.UK)); + if (hasTwoDigitsForYear(pattern)) { + builder.appendOptional(setBaseYear(pattern)); + } else { + builder.appendOptional(DateTimeFormatter.ofPattern(pattern, Locale.UK)); + } } } } catch (IOException e) { @@ -106,6 +114,32 @@ public class DateConverter { } + private boolean hasTwoDigitsForYear(String input) { + // Regex to match any string with exactly two 'y' characters + Pattern pattern = Pattern.compile("^[^y]*(y[^y]*){2}$"); + Matcher matcher = pattern.matcher(input); + + return matcher.matches(); + + } + + + private DateTimeFormatter setBaseYear(String pattern) { + + DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder(); + if (pattern.startsWith("yy")) { + String editedPattern = pattern.substring(2); + builder.appendValueReduced(ChronoField.YEAR_OF_ERA, 2, 2, BASE_YEAR).appendPattern(editedPattern).toFormatter(); + } else if (pattern.endsWith("yy")) { + String editedPattern = pattern.substring(0, pattern.length() - 2); + builder.appendPattern(editedPattern).appendValueReduced(ChronoField.YEAR_OF_ERA, 2, 2, BASE_YEAR).toFormatter(); + } else { + throw new RuntimeException("Date format not supported: " + pattern); + } + return builder.toFormatter(); + } + + private String removeTrailingDot(String dateAsString) { String str = dateAsString; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/resources/date_formats.txt b/redaction-service-v1/redaction-service-server-v1/src/main/resources/date_formats.txt index e0b40b5a..fedcb8b6 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/resources/date_formats.txt +++ b/redaction-service-v1/redaction-service-server-v1/src/main/resources/date_formats.txt @@ -1,41 +1,74 @@ dd-MMM-yyyy +d-MMM-yyyy dd MMM yyyy +d MMM yyyy dd MMM yy d MMM yy dd MM yyyy +d MM yyyy dd MMMM yyyy -MMMM dd, yyyy -dd.MM.yyyy -yyyy/MM/dd -yyyy-MM-dd -dd-MM-yyyy -MMMM d, yyyy d MMMM yyyy +MMMM dd, yyyy +MMMM d, yyyy +dd.MM.yyyy +d.MM.yyyy +yyyy/MM/dd +yyyy/MM/d +yyyy-MM-dd +yyyy-MM-d +dd-MM-yyyy +d-MM-yyyy +MMM dd, yyyy MMM d, yyyy +dd['.'] MMM yyyy d['.'] MMM yyyy -d-MMM-yyyy +dd['th']['st']['nd']['rd'] 'of' MMMM, yyyy d['th']['st']['nd']['rd'] 'of' MMMM, yyyy +MMMM dd['th']['st']['nd']['rd'], yyyy MMMM d['th']['st']['nd']['rd'], yyyy +yyyy, MMMM dd yyyy, MMMM d yyyy.MM.dd +yyyy.MM.d yyyyMMdd +yyyyMMd dd-MM-yy +d-MM-yy dd/MM/yy +d/MM/yy +MMMM dd, yy MMMM d, yy +dd MMMM, yy d MMMM, yy +dd['th']['st']['nd']['rd'] MMM yyyy d['th']['st']['nd']['rd'] MMM yyyy +MMM dd['th']['st']['nd']['rd'], yy MMM d['th']['st']['nd']['rd'], yy yyyy-MMM-dd +yyyy-MMM-d MMM-dd-yyyy -dd-MM-yyyy -yyyy, MMMM dd +MMM-d-yyyy dd.MMM.yyyy +d.MMM.yyyy dd.MMMM.yyyy +d.MMMM.yyyy dd.MMM.yy +d.MMM.yy dd.MMMM.yy +d.MMMM.yy dd.MMM-yyyy +d.MMM-yyyy dd.MMMM-yyyy +d.MMMM-yyyy dd. MMM yy +d. MMM yy +dd['th']['st']['nd']['rd'] MMMM yy d['th']['st']['nd']['rd'] MMMM yy +dd['th']['st']['nd']['rd'] MMMM yyyy d['th']['st']['nd']['rd'] MMMM yyyy -dd.MM.yy \ No newline at end of file +dd.MM.yy +d.MM.yy +dd MMM. yyyy +d MMM. yyyy +yyyy MMM dd +yyyy MMM d \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java index 739c6c30..cf1f7e98 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java @@ -8,6 +8,7 @@ import static org.mockito.Mockito.when; import java.io.FileOutputStream; import java.io.IOException; import java.time.OffsetDateTime; +import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -165,12 +166,19 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest { .get(); assertNotNull(experimentalDates); + List expectedDates = new ArrayList<>(); + expectedDates.add("10/01/2022"); + expectedDates.add("08/09/2024"); + expectedDates.add("03/01/2022"); + expectedDates.add("03/08/1992"); + expectedDates.add("27/02/1992"); + String dates = experimentalDates.getComponentValues() .get(0).getValue(); String[] dateArray = dates.split(", "); boolean allEqual = true; for (String date : dateArray) { - if (!"10/01/2022".equals(date) && !"08/09/2024".equals(date)) { + if (!expectedDates.contains(date)) { allEqual = false; break; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/dates/date_formats.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/dates/date_formats.pdf index 00abd942..89853993 100644 Binary files a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/dates/date_formats.pdf and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/dates/date_formats.pdf differ