Compare commits
27 Commits
master
...
release/4.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
adc0f8215d | ||
|
|
401f1d50ac | ||
|
|
5cab4c2eb4 | ||
|
|
806afc40d2 | ||
|
|
2537cb97f2 | ||
|
|
1397290a0e | ||
|
|
c479df124d | ||
|
|
e5e8135bce | ||
|
|
812744fa1d | ||
|
|
04632787f1 | ||
|
|
566472bfb0 | ||
|
|
10538348dd | ||
|
|
21a515b26e | ||
|
|
e5933694f2 | ||
|
|
2a92404701 | ||
|
|
113a7c9c00 | ||
|
|
88e656f870 | ||
|
|
02c23a0a46 | ||
|
|
03c0f1a619 | ||
|
|
5da0536e11 | ||
|
|
e417eea0be | ||
|
|
b0f8d025b7 | ||
|
|
dfa6051063 | ||
|
|
d5769ced15 | ||
|
|
bc7dd601c0 | ||
|
|
913cde6f23 | ||
|
|
c18f433186 |
@ -21,3 +21,5 @@ deploy:
|
||||
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
|
||||
- if: $CI_COMMIT_BRANCH =~ /^release/
|
||||
- if: $CI_COMMIT_TAG
|
||||
pmd:
|
||||
allow_failure: true
|
||||
|
||||
@ -51,6 +51,8 @@ dependencies {
|
||||
implementation("org.springframework.cloud:spring-cloud-starter-openfeign:4.0.4")
|
||||
implementation("org.springframework.boot:spring-boot-starter-amqp:3.1.4")
|
||||
|
||||
implementation("com.joestelmach:natty:0.13")
|
||||
|
||||
testImplementation(project(":rules-management"))
|
||||
testImplementation("org.apache.pdfbox:pdfbox:${pdfBoxVersion}")
|
||||
testImplementation("org.apache.pdfbox:pdfbox-tools:${pdfBoxVersion}")
|
||||
|
||||
@ -40,7 +40,10 @@ public class DocumentTree {
|
||||
|
||||
public TextBlock buildTextBlock() {
|
||||
|
||||
return allEntriesInOrder().map(Entry::getNode).filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock).collect(new TextBlockCollector());
|
||||
return allEntriesInOrder().map(Entry::getNode)
|
||||
.filter(SemanticNode::isLeaf)
|
||||
.map(SemanticNode::getLeafTextBlock)
|
||||
.collect(new TextBlockCollector());
|
||||
}
|
||||
|
||||
|
||||
@ -89,8 +92,8 @@ public class DocumentTree {
|
||||
if (treeId.isEmpty()) {
|
||||
return root != null;
|
||||
}
|
||||
Entry entry = root.children.get(treeId.get(0));
|
||||
for (int id : treeId.subList(1, treeId.size())) {
|
||||
Entry entry = root;
|
||||
for (int id : treeId) {
|
||||
if (id >= entry.children.size() || 0 > id) {
|
||||
return false;
|
||||
}
|
||||
@ -114,13 +117,16 @@ public class DocumentTree {
|
||||
|
||||
public Stream<SemanticNode> childNodes(List<Integer> treeId) {
|
||||
|
||||
return getEntryById(treeId).children.stream().map(Entry::getNode);
|
||||
return getEntryById(treeId).children.stream()
|
||||
.map(Entry::getNode);
|
||||
}
|
||||
|
||||
|
||||
public Stream<SemanticNode> childNodesOfType(List<Integer> treeId, NodeType nodeType) {
|
||||
|
||||
return getEntryById(treeId).children.stream().filter(entry -> entry.node.getType().equals(nodeType)).map(Entry::getNode);
|
||||
return getEntryById(treeId).children.stream()
|
||||
.filter(entry -> entry.node.getType().equals(nodeType))
|
||||
.map(Entry::getNode);
|
||||
}
|
||||
|
||||
|
||||
@ -199,26 +205,32 @@ public class DocumentTree {
|
||||
|
||||
public Stream<Entry> allEntriesInOrder() {
|
||||
|
||||
return Stream.of(root).flatMap(DocumentTree::flatten);
|
||||
return Stream.of(root)
|
||||
.flatMap(DocumentTree::flatten);
|
||||
}
|
||||
|
||||
|
||||
public Stream<Entry> allSubEntriesInOrder(List<Integer> parentId) {
|
||||
|
||||
return getEntryById(parentId).children.stream().flatMap(DocumentTree::flatten);
|
||||
return getEntryById(parentId).children.stream()
|
||||
.flatMap(DocumentTree::flatten);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return String.join("\n", allEntriesInOrder().map(Entry::toString).toList());
|
||||
return String.join("\n",
|
||||
allEntriesInOrder().map(Entry::toString)
|
||||
.toList());
|
||||
}
|
||||
|
||||
|
||||
private static Stream<Entry> flatten(Entry entry) {
|
||||
|
||||
return Stream.concat(Stream.of(entry), entry.children.stream().flatMap(DocumentTree::flatten));
|
||||
return Stream.concat(Stream.of(entry),
|
||||
entry.children.stream()
|
||||
.flatMap(DocumentTree::flatten));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -118,7 +118,7 @@ public class RedactionMessageReceiver {
|
||||
|
||||
private void sendAnalysisFailed(AnalyzeRequest analyzeRequest, boolean priority, Exception e) {
|
||||
|
||||
log.warn("Failed to process analyze request: {}", analyzeRequest, e);
|
||||
log.error("Failed to process analyze request: {}", analyzeRequest, e);
|
||||
var timestamp = OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS);
|
||||
fileStatusProcessingUpdateClient.analysisFailed(analyzeRequest.getDossierId(),
|
||||
analyzeRequest.getFileId(),
|
||||
|
||||
@ -50,6 +50,10 @@ public class EntityChangeLogService {
|
||||
entityLogEntry.getChanges().add(new Change(analysisNumber, changeType, now));
|
||||
}
|
||||
}
|
||||
|
||||
if (!previousEntity.equals(entityLogEntry)) {
|
||||
hasChanges = true;
|
||||
}
|
||||
}
|
||||
addRemovedEntriesAsRemoved(previousEntityLogEntries, newEntityLogEntries, analysisNumber, now);
|
||||
return hasChanges;
|
||||
|
||||
@ -599,6 +599,9 @@ public class EntityCreationService {
|
||||
throw new IllegalArgumentException(String.format("%s is not in the %s of the provided semantic node %s", textRange, node.getTextRange(), node));
|
||||
}
|
||||
TextRange trimmedTextRange = textRange.trim(node.getTextBlock());
|
||||
if (trimmedTextRange.length() == 0){
|
||||
return Optional.empty();
|
||||
}
|
||||
TextEntity entity = TextEntity.initialEntityNode(trimmedTextRange, type, entityType);
|
||||
if (node.getEntities().contains(entity)) {
|
||||
return node.getEntities().stream().filter(entity::equals).peek(e -> e.addEngines(engines)).findAny();
|
||||
|
||||
@ -1,11 +1,25 @@
|
||||
package com.iqser.red.service.redaction.v1.server.utils;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.text.DateFormat;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.time.LocalDate;
|
||||
import java.time.ZoneId;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.DateTimeFormatterBuilder;
|
||||
import java.time.format.DateTimeParseException;
|
||||
import java.time.format.ResolverStyle;
|
||||
import java.time.temporal.ChronoField;
|
||||
import java.util.Arrays;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
@ -17,39 +31,123 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class DateConverter {
|
||||
|
||||
static List<SimpleDateFormat> formats = List.of(new SimpleDateFormat("dd MMM yy", Locale.ENGLISH),
|
||||
new SimpleDateFormat("dd MM yyyy", Locale.ENGLISH),
|
||||
new SimpleDateFormat("dd MM yyyy.", Locale.ENGLISH),
|
||||
new SimpleDateFormat("dd MMMM yyyy", Locale.ENGLISH),
|
||||
new SimpleDateFormat("MMMM dd, yyyy", Locale.ENGLISH),
|
||||
new SimpleDateFormat("dd-MMM-yyyy", Locale.ENGLISH));
|
||||
private static DateTimeFormatter DATE_TIME_FORMATTER;
|
||||
private static final List<Locale> LOCALES = Arrays.asList(Locale.UK, Locale.US);
|
||||
private static int BASE_YEAR = 1950; // base year 1950 means, that "yy" will be interpreted in range 1950-2049
|
||||
|
||||
|
||||
public Optional<Date> parseDate(String dateAsString) {
|
||||
|
||||
Date date = null;
|
||||
for (SimpleDateFormat format : formats) {
|
||||
DateTimeFormatter formatter = getDateTimeFormatter();
|
||||
String cleanDate = dateAsString.trim();
|
||||
cleanDate = removeTrailingDot(cleanDate);
|
||||
|
||||
for (Locale locale : LOCALES) {
|
||||
try {
|
||||
date = format.parse(dateAsString);
|
||||
break;
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to parse date from string {}. \n{}", dateAsString, e.getMessage());
|
||||
// ignore, try next...
|
||||
return convertToDate(locale, cleanDate, formatter);
|
||||
} catch (DateTimeParseException e) {
|
||||
try {
|
||||
Optional<String> extractedDate = DateExtractorNatty.extractDate(cleanDate);
|
||||
if (extractedDate.isEmpty()) {
|
||||
log.warn("Failed to extract a valid date from value: {}", cleanDate);
|
||||
return Optional.empty();
|
||||
} else {
|
||||
cleanDate = extractedDate.get();
|
||||
return convertToDate(locale, cleanDate, formatter);
|
||||
}
|
||||
} catch (DateTimeParseException exception) {
|
||||
log.debug("Failed to parse date: {} with locale: {}", cleanDate, locale);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (date == null) {
|
||||
return Optional.empty();
|
||||
}
|
||||
return Optional.of(date);
|
||||
|
||||
log.warn("Failed to parse date: {}", cleanDate);
|
||||
return Optional.empty();
|
||||
|
||||
}
|
||||
|
||||
|
||||
public String convertDate(Date date, String resultFormat) {
|
||||
|
||||
DateFormat resultDateFormat = new SimpleDateFormat(resultFormat, Locale.ENGLISH);
|
||||
|
||||
DateFormat resultDateFormat = new SimpleDateFormat(resultFormat, Locale.UK);
|
||||
return resultDateFormat.format(date);
|
||||
}
|
||||
|
||||
|
||||
private Optional<Date> convertToDate(Locale locale, String cleanDate, DateTimeFormatter formatter) {
|
||||
|
||||
LocalDate localDate = LocalDate.parse(cleanDate, formatter.withLocale(locale));
|
||||
Date date = Date.from(localDate.atStartOfDay(ZoneId.systemDefault()).toInstant());
|
||||
return Optional.of(date);
|
||||
}
|
||||
|
||||
|
||||
private DateTimeFormatter getDateTimeFormatter() {
|
||||
|
||||
if (DATE_TIME_FORMATTER == null) {
|
||||
DATE_TIME_FORMATTER = createFormatterFromResource();
|
||||
}
|
||||
return DATE_TIME_FORMATTER;
|
||||
}
|
||||
|
||||
|
||||
private DateTimeFormatter createFormatterFromResource() {
|
||||
|
||||
DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
|
||||
builder.parseCaseInsensitive();
|
||||
try (BufferedReader reader = new BufferedReader(new InputStreamReader(Objects.requireNonNull(DateConverter.class.getResourceAsStream("/date_formats.txt"))))) {
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
String pattern = line.trim();
|
||||
if (!pattern.isEmpty()) {
|
||||
if (hasTwoDigitsForYear(pattern)) {
|
||||
builder.appendOptional(setBaseYear(pattern));
|
||||
} else {
|
||||
builder.appendOptional(DateTimeFormatter.ofPattern(pattern, Locale.UK));
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Error reading date format file: " + e.getMessage(), e);
|
||||
}
|
||||
return builder.toFormatter().withResolverStyle(ResolverStyle.SMART).withLocale(Locale.UK);
|
||||
}
|
||||
|
||||
|
||||
private boolean hasTwoDigitsForYear(String input) {
|
||||
// Regex to match any string with exactly two 'y' characters
|
||||
Pattern pattern = Pattern.compile("^[^y]*(y[^y]*){2}$");
|
||||
Matcher matcher = pattern.matcher(input);
|
||||
|
||||
return matcher.matches();
|
||||
|
||||
}
|
||||
|
||||
|
||||
private DateTimeFormatter setBaseYear(String pattern) {
|
||||
|
||||
DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
|
||||
if (pattern.startsWith("yy")) {
|
||||
String editedPattern = pattern.substring(2);
|
||||
builder.appendValueReduced(ChronoField.YEAR_OF_ERA, 2, 2, BASE_YEAR).appendPattern(editedPattern).toFormatter();
|
||||
} else if (pattern.endsWith("yy")) {
|
||||
String editedPattern = pattern.substring(0, pattern.length() - 2);
|
||||
builder.appendPattern(editedPattern).appendValueReduced(ChronoField.YEAR_OF_ERA, 2, 2, BASE_YEAR).toFormatter();
|
||||
} else {
|
||||
throw new RuntimeException("Date format not supported: " + pattern);
|
||||
}
|
||||
return builder.toFormatter();
|
||||
}
|
||||
|
||||
|
||||
private String removeTrailingDot(String dateAsString) {
|
||||
|
||||
String str = dateAsString;
|
||||
if (str != null && !str.isEmpty() && str.charAt(str.length() - 1) == '.') {
|
||||
str = str.substring(0, str.length() - 1);
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,26 @@
|
||||
package com.iqser.red.service.redaction.v1.server.utils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
import com.joestelmach.natty.DateGroup;
|
||||
import com.joestelmach.natty.Parser;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
public class DateExtractorNatty {
|
||||
|
||||
public Optional<String> extractDate(String text) {
|
||||
|
||||
Parser parser = new Parser();
|
||||
List<DateGroup> groups = parser.parse(text);
|
||||
if (!groups.isEmpty()) {
|
||||
DateGroup group = groups.get(0);
|
||||
String dateText = group.getText();
|
||||
return Optional.of(dateText);
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
}
|
||||
@ -151,7 +151,9 @@ public class RedactionSearchUtility {
|
||||
Matcher matcher = pattern.matcher(textBlock.subSequence(textBlock.getTextRange()));
|
||||
List<TextRange> boundaries = new LinkedList<>();
|
||||
while (matcher.find()) {
|
||||
boundaries.add(new TextRange(matcher.start(group) + textBlock.getTextRange().start(), matcher.end(group) + textBlock.getTextRange().start()));
|
||||
if (!matcher.group(group).isBlank()) {
|
||||
boundaries.add(new TextRange(matcher.start(group) + textBlock.getTextRange().start(), matcher.end(group) + textBlock.getTextRange().start()));
|
||||
}
|
||||
}
|
||||
return boundaries;
|
||||
}
|
||||
@ -163,7 +165,9 @@ public class RedactionSearchUtility {
|
||||
Matcher matcher = pattern.matcher(searchTextWithLineBreaks);
|
||||
List<TextRange> boundaries = new LinkedList<>();
|
||||
while (matcher.find()) {
|
||||
boundaries.add(new TextRange(matcher.start(group) + textBlock.getTextRange().start(), matcher.end(group) + textBlock.getTextRange().start()));
|
||||
if (!matcher.group(group).isBlank()) {
|
||||
boundaries.add(new TextRange(matcher.start(group) + textBlock.getTextRange().start(), matcher.end(group) + textBlock.getTextRange().start()));
|
||||
}
|
||||
}
|
||||
return boundaries;
|
||||
}
|
||||
|
||||
@ -0,0 +1,79 @@
|
||||
yyyy MMM dd
|
||||
yyyy MMM d
|
||||
yyyy MM dd
|
||||
yyyy MM d
|
||||
dd-MMM-yyyy
|
||||
d-MMM-yyyy
|
||||
dd MMM yyyy
|
||||
d MMM yyyy
|
||||
dd MMM yy
|
||||
d MMM yy
|
||||
dd MM yyyy
|
||||
d MM yyyy
|
||||
dd MMMM yyyy
|
||||
d MMMM yyyy
|
||||
MMMM dd, yyyy
|
||||
MMMM d, yyyy
|
||||
MMMM, d yyyy
|
||||
MMMM d,yyyy
|
||||
dd.MM.yyyy
|
||||
d.MM.yyyy
|
||||
yyyy/MM/dd
|
||||
yyyy/MM/d
|
||||
yyyy-MM-dd
|
||||
yyyy-MM-d
|
||||
dd-MM-yyyy
|
||||
d-MM-yyyy
|
||||
MMM dd, yyyy
|
||||
MMM d, yyyy
|
||||
dd['.'] MMM yyyy
|
||||
d['.'] MMM yyyy
|
||||
dd['th']['st']['nd']['rd'] 'of' MMMM, yyyy
|
||||
d['th']['st']['nd']['rd'] 'of' MMMM, yyyy
|
||||
d['st']['nd']['rd']['th'] MMMM yyyy
|
||||
MMMM dd['th']['st']['nd']['rd'], yyyy
|
||||
MMMM d['th']['st']['nd']['rd'], yyyy
|
||||
yyyy, MMMM dd
|
||||
yyyy, MMMM d
|
||||
yyyy.MM.dd
|
||||
yyyy.MM.d
|
||||
dd-MM-yy
|
||||
d-MM-yy
|
||||
dd/MM/yy
|
||||
d/MM/yy
|
||||
MMMM dd, yy
|
||||
MMMM d, yy
|
||||
dd MMMM, yy
|
||||
d MMMM, yy
|
||||
dd['th']['st']['nd']['rd'] MMM yyyy
|
||||
d['th']['st']['nd']['rd'] MMM yyyy
|
||||
MMM dd['th']['st']['nd']['rd'], yy
|
||||
MMM d['th']['st']['nd']['rd'], yy
|
||||
yyyy-MMM-dd
|
||||
yyyy-MMM-d
|
||||
MMM-dd-yyyy
|
||||
MMM-d-yyyy
|
||||
dd.MMM.yyyy
|
||||
d.MMM.yyyy
|
||||
dd.MMMM.yyyy
|
||||
d.MMMM.yyyy
|
||||
dd.MMM.yy
|
||||
d.MMM.yy
|
||||
dd.MMMM.yy
|
||||
d.MMMM.yy
|
||||
dd.MMM-yyyy
|
||||
d.MMM-yyyy
|
||||
dd.MMMM-yyyy
|
||||
d.MMMM-yyyy
|
||||
dd. MMM yy
|
||||
d. MMM yy
|
||||
dd['th']['st']['nd']['rd'] MMMM yy
|
||||
d['th']['st']['nd']['rd'] MMMM yy
|
||||
dd['th']['st']['nd']['rd'] MMMM yyyy
|
||||
d['th']['st']['nd']['rd'] MMMM yyyy
|
||||
dd.MM.yy
|
||||
d.MM.yy
|
||||
dd MMM. yyyy
|
||||
d MMM. yyyy
|
||||
d-MMMM-yyyy
|
||||
dd-MMMM-yyyy
|
||||
@ -1,9 +1,12 @@
|
||||
package com.iqser.red.service.redaction.v1.server;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||
@ -106,6 +109,60 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testConvertingVariousDateFormats() {
|
||||
|
||||
AnalyzeRequest request = uploadFileToStorage("files/dates/date_formats.pdf");
|
||||
System.out.println("Start Full integration test");
|
||||
analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request);
|
||||
System.out.println("Finished structure analysis");
|
||||
analyzeService.analyze(request);
|
||||
System.out.println("Finished analysis");
|
||||
|
||||
var componentLog = redactionStorageService.getComponentLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
assertNotNull(componentLog);
|
||||
|
||||
var experimentalDates = componentLog.getComponentLogEntries()
|
||||
.stream()
|
||||
.filter(c -> c.getName().equals("Experimental_Starting_Date"))
|
||||
.findFirst()
|
||||
.get();
|
||||
assertNotNull(experimentalDates);
|
||||
|
||||
List<String> expectedDates = new ArrayList<>();
|
||||
expectedDates.add("10/01/2022");
|
||||
expectedDates.add("01/01/2022");
|
||||
expectedDates.add("08/09/2024");
|
||||
expectedDates.add("03/01/2022");
|
||||
expectedDates.add("03/08/1992");
|
||||
expectedDates.add("13/08/1992");
|
||||
expectedDates.add("27/02/1992");
|
||||
expectedDates.add("27/10/1989");
|
||||
expectedDates.add("07/10/1989");
|
||||
expectedDates.add("21/08/1998");
|
||||
expectedDates.add("02/08/1998");
|
||||
expectedDates.add("01/05/1988");
|
||||
expectedDates.add("02/06/2003");
|
||||
expectedDates.add("03/09/2005");
|
||||
expectedDates.add("06/09/2005");
|
||||
expectedDates.add("17/08/2005");
|
||||
expectedDates.add("22/08/2035");
|
||||
|
||||
String dates = experimentalDates.getComponentValues()
|
||||
.get(0).getValue();
|
||||
String[] dateArray = dates.split(", ");
|
||||
boolean allEqual = true;
|
||||
for (String date : dateArray) {
|
||||
if (!expectedDates.contains(date)) {
|
||||
allEqual = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assertTrue(allEqual);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
// @Disabled
|
||||
public void testTopOfPage13InNotHeader() throws IOException {
|
||||
|
||||
@ -0,0 +1,66 @@
|
||||
package com.iqser.red.service.redaction.v1.server.date;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.utils.DateConverter;
|
||||
|
||||
public class DateConverterTest {
|
||||
|
||||
@Test
|
||||
public void testDateConverter() {
|
||||
|
||||
List<String> goldenStandardDates = Arrays.asList("3 Jun 08",
|
||||
"09. Apr 09",
|
||||
"07-Sep-2010",
|
||||
"26-FEB-2008",
|
||||
"30-APR-2008",
|
||||
"30-apr-2008",
|
||||
"30-Apr-2008",
|
||||
"1 Apr 08",
|
||||
"26-FEB-2008",
|
||||
"19-MAR-2008",
|
||||
"1 Apr 08",
|
||||
"27-MAR-2008",
|
||||
"06-MAY-2008",
|
||||
"3 Apr 08",
|
||||
"12-MAR-2008",
|
||||
"08-APR-2008",
|
||||
"1 Apr 08",
|
||||
"4 Apr 08",
|
||||
"13 November 2017 (animal 1)",
|
||||
"16 November 2017 (animal 1)",
|
||||
"27 March 2018 (animal 1 - 5000 mg/kg bw)",
|
||||
"10 April 2018 (animal 1 - 5000 mg/kg bw)",
|
||||
"13 November 2017 (animal 1)",
|
||||
"16 November 2017 (animal 1)",
|
||||
"28 March 2018 (animal 1 - 5000 mg/kg bw)",
|
||||
"28 March 2018 (animal1 - 5000 mg/kg bw)",
|
||||
"28 August 2018 (animal 1)",
|
||||
"October, 27 1989",
|
||||
"October, 7 1989",
|
||||
"August 21,1998",
|
||||
"August 2,1998",
|
||||
"1st May 1988",
|
||||
"2nd June 2003",
|
||||
"3rd September 2005",
|
||||
"6th September 2005",
|
||||
"17th August 2005",
|
||||
"22nd August 2035",
|
||||
"12-January-2023",
|
||||
"2-January-2023");
|
||||
|
||||
|
||||
|
||||
for (String dateStr : goldenStandardDates) {
|
||||
Optional<Date> parsedDate = DateConverter.parseDate(dateStr);
|
||||
assertTrue(parsedDate.isPresent(), "Failed to parse date: " + dateStr);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,28 @@
|
||||
package com.iqser.red.service.redaction.v1.server.date;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import java.util.Optional;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.utils.DateExtractorNatty;
|
||||
|
||||
public class DateExtractorNattyTest {
|
||||
|
||||
@Test
|
||||
public void testExtractDate() {
|
||||
|
||||
String[] testStrings = {"13 November 2017 (animal 1)", "16 November 2017 (animal 1)", "27 March 2018 (animal 1 - 5000 mg/kg bw)", "10 April 2018 (animal 1 - 5000 mg/kg bw)", "13 November 2017 (animal 1)", "16 November 2017 (animal 1)", "28 March 2018 (animal 1 - 5000 mg/kg bw)", "28 March 2018 (animal1 - 5000 mg/kg bw)", "28 August 2018 (animal 1)", "31 August 2018 (animal 1)"};
|
||||
|
||||
String[] expectedDates = {"13 November 2017", "16 November 2017", "27 March 2018", "10 April 2018", "13 November 2017", "16 November 2017", "28 March 2018", "28 March 2018", "28 August 2018", "31 August 2018"};
|
||||
|
||||
for (int i = 0; i < testStrings.length; i++) {
|
||||
Optional<String> extractedDate = DateExtractorNatty.extractDate(testStrings[i]);
|
||||
assertTrue(extractedDate.isPresent(), "No date found in: " + testStrings[i]);
|
||||
assertEquals(expectedDates[i], extractedDate.get(), "Failed to extract correct date from: " + testStrings[i]);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user