RED-9541 - Add extra CSV validation

This commit is contained in:
Andrei Isvoran 2024-09-10 09:36:38 +03:00
parent f0bdfe3bce
commit 1ca8c08f47
4 changed files with 98 additions and 3 deletions

View File

@ -150,16 +150,46 @@ public class FileAttributesManagementService {
try (CSVReader csvReader = new CSVReaderBuilder(new InputStreamReader(new ByteArrayInputStream(csvFileBytes), charset)).withCSVParser(new CSVParserBuilder().withSeparator(
delimiterChar).build()).build()) {
while (csvReader.readNext() != null) {
// Intentionally empty: reading lines for validation purposes
String[] nextLine;
int lineNumber = 0;
int expectedColumnCount = -1;
while ((nextLine = csvReader.readNext()) != null) {
lineNumber++;
if (expectedColumnCount == -1) {
expectedColumnCount = nextLine.length;
} else if (nextLine.length != expectedColumnCount) {
throw new BadRequestException("Invalid CSV file format at line " + lineNumber + ": Expected " + expectedColumnCount + " columns but found " + nextLine.length);
}
for (String field : nextLine) {
if (field != null && !field.isEmpty()) {
validateQuotesInField(field, lineNumber);
}
}
}
} catch (IOException | CsvValidationException e) {
} catch (Exception e) {
throw new BadRequestException("Invalid CSV file format: " + e.getMessage(), e);
}
}
private void validateQuotesInField(String field, int lineNumber) {
boolean startsWithQuote = field.startsWith("\"");
boolean endsWithQuote = field.endsWith("\"");
if (startsWithQuote && !endsWithQuote) {
throw new BadRequestException("Invalid CSV format at line " + lineNumber + ": Unmatched quotation marks. in field '" + field + "'");
}
}
@SuppressFBWarnings("RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE")
private List<List<String>> getCsvRecords(byte[] csv, String delimiter, String encoding) {

View File

@ -311,4 +311,65 @@ public class FileAttributeTest extends AbstractPersistenceServerServiceTest {
assertEquals(400, exception.status());
}
@Test
@SneakyThrows
public void testUploadCSV() {
var dossier = dossierTesterAndProvider.provideTestDossier();
var generalConfig = new FileAttributesConfig();
generalConfig.setDelimiter(",");
generalConfig.setEncoding("UTF-8");
generalConfig.setFilenameMappingColumnHeaderName("Path");
fileAttributeConfigClient.setFileAttributesConfig(dossier.getDossierTemplateId(), generalConfig);
FileAttributeConfig vertebrateStudy = new FileAttributeConfig();
vertebrateStudy.setPrimaryAttribute(false);
vertebrateStudy.setLabel("Vertebrate Study");
vertebrateStudy.setCsvColumnHeader("Vertebrate Study");
vertebrateStudy.setType(FileAttributeType.TEXT);
vertebrateStudy.setFilterable(true);
vertebrateStudy.setDisplayedInFileList(true);
fileAttributeConfigClient.addOrUpdateFileAttribute(dossier.getDossierTemplateId(), vertebrateStudy);
FileAttributeConfig minorVersion = new FileAttributeConfig();
minorVersion.setPrimaryAttribute(false);
minorVersion.setLabel("Minor Version Number");
minorVersion.setCsvColumnHeader("Minor Version Number");
minorVersion.setType(FileAttributeType.DATE);
minorVersion.setFilterable(true);
minorVersion.setDisplayedInFileList(true);
fileAttributeConfigClient.addOrUpdateFileAttribute(dossier.getDossierTemplateId(), minorVersion);
FileAttributeConfig majorVersion = new FileAttributeConfig();
majorVersion.setPrimaryAttribute(false);
majorVersion.setLabel("Major Version Number");
majorVersion.setCsvColumnHeader("Major Version Number");
majorVersion.setType(FileAttributeType.NUMBER);
majorVersion.setFilterable(true);
majorVersion.setDisplayedInFileList(true);
fileAttributeConfigClient.addOrUpdateFileAttribute(dossier.getDossierTemplateId(), majorVersion);
var missingComma = new MockMultipartFile("file.csv",
"fileattributes_missing_comma.csv",
"application/csv",
IOUtils.toByteArray(new ClassPathResource("files/csv/fileattributes_missing_comma.csv").getInputStream()));
var result = assertThrows(FeignException.class, () -> uploadClient.upload(missingComma, dossier.getId(), false, false));
assertTrue(result.getMessage().contains("Invalid CSV file format: Invalid CSV file format at line 2: Expected 5 columns but found 4"));
var missingQuotation = new MockMultipartFile("file.csv",
"fileattributes_missing_quotation_mark.csv",
"application/csv",
IOUtils.toByteArray(new ClassPathResource("files/csv/fileattributes_missing_quotation_mark.csv").getInputStream()));
result = assertThrows(FeignException.class, () -> uploadClient.upload(missingQuotation, dossier.getId(), false, false));
assertTrue(result.getMessage().contains("Invalid CSV file format: Unterminated quoted field at end of CSV line. Beginning of lost text: [4.636.0,4.363.0,4.363.0\\n]"));
}
}

View File

@ -0,0 +1,2 @@
Path,"Document Title","Major Version Number","Minor Version Number","Vertebrate Study"
"402Study.pdf","My Title","4.636.0","4.363.0""4.363.0"
Can't render this file because it has a wrong number of fields in line 2.

View File

@ -0,0 +1,2 @@
Path,"Document Title","Major Version Number","Minor Version Number","Vertebrate Study"
"402Study.pdf","My Title","4.636.0,"4.363.0","4.363.0"
Can't render this file because it contains an unexpected character in line 2 and column 36.