RED-10443 - 500 Error occurs when selecting ISO-8859-1 as Encoding Type for any CSV File Format

- accept only the IS-8859-1 as encoding. Meaningful message (400) is returned in case of bad encoding
- update unit tests and add unit test
This commit is contained in:
corinaolariu 2024-11-22 11:32:18 +02:00
parent 3be9566a2e
commit e5ea667ea1
8 changed files with 67 additions and 35 deletions

View File

@ -44,7 +44,11 @@ public interface FileAttributesResource {
String FILE_ID = "fileId";
String FILE_ID_PATH_VARIABLE = "/{" + FILE_ID + "}";
Set<String> encodingList = Sets.newHashSet("ISO", "ASCII", "UTF-8");
String UTF_ENCODING = "UTF-8";
String ASCII_ENCODING = "ASCII";
String ISO_ENCODING = "ISO-8859-1";
Set<String> encodingList = Sets.newHashSet(ISO_ENCODING, ASCII_ENCODING, UTF_ENCODING);
@ResponseBody

View File

@ -24,6 +24,7 @@ import com.iqser.red.service.persistence.management.v1.processor.mapper.Componen
import com.iqser.red.service.persistence.management.v1.processor.model.ComponentMapping;
import com.iqser.red.service.persistence.management.v1.processor.model.ComponentMappingDownloadModel;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.DossierTemplatePersistenceService;
import com.iqser.red.service.persistence.management.v1.processor.utils.StringEncodingUtils;
import com.iqser.red.service.persistence.service.v1.api.shared.model.component.ComponentMappingMetadata;
import com.opencsv.CSVParserBuilder;
import com.opencsv.CSVReader;
@ -107,7 +108,7 @@ public class ComponentMappingService {
String fileName,
char quoteChar) {
Charset charset = resolveCharset(encoding);
Charset charset = StringEncodingUtils.resolveCharset(encoding);
CsvStats stats = sortCSVFile(delimiter, mappingFile, charset, quoteChar);
@ -126,20 +127,6 @@ public class ComponentMappingService {
}
private static Charset resolveCharset(String encoding) {
try {
return Charset.forName(encoding);
} catch (IllegalCharsetNameException e) {
throw new BadRequestException("Invalid character encoding: " + encoding);
} catch (UnsupportedCharsetException e) {
throw new BadRequestException("Unsupported character encoding: " + encoding);
} catch (IllegalArgumentException e) {
throw new BadRequestException("Encoding can't be null.");
}
}
private static CsvStats sortCSVFile(char delimiter, File mappingFile, Charset charset, char quoteChar) throws BadRequestException, IOException {
Path tempFile = Files.createTempFile("mapping", ".tmp");

View File

@ -1,5 +1,7 @@
package com.iqser.red.service.persistence.management.v1.processor.service;
import static com.iqser.red.service.persistence.service.v1.api.external.resource.FileAttributesResource.ASCII_ENCODING;
import static com.iqser.red.service.persistence.service.v1.api.external.resource.FileAttributesResource.ISO_ENCODING;
import static com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileAttributeTypeFormats.FILE_ATTRIBUTE_TYPE_DATE_FORMAT;
import static com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileAttributeTypeFormats.FILE_ATTRIBUTE_TYPE_NUMBER_REGEX;
@ -32,6 +34,7 @@ import com.iqser.red.service.persistence.management.v1.processor.exception.Confl
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.DossierPersistenceService;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.FileAttributeConfigPersistenceService;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.FileStatusPersistenceService;
import com.iqser.red.service.persistence.management.v1.processor.utils.StringEncodingUtils;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.ImportCsvRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.ImportCsvResponse;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileAttributeType;
@ -59,10 +62,6 @@ public class FileAttributesManagementService {
private final DossierPersistenceService dossierPersistenceService;
private final IndexingService indexingService;
public static String UTF_ENCODING = "UTF-8";
public static String ASCII_ENCODING = "ASCII";
public static String ISO_ENCODING = "ISO";
@Transactional
public ImportCsvResponse importCsv(String dossierId, ImportCsvRequest importCsvRequest) {
@ -144,7 +143,7 @@ public class FileAttributesManagementService {
throw new IllegalArgumentException("Delimiter must be a single character.");
}
char delimiterChar = delimiter.charAt(0);
Charset charset = Charset.forName(encoding);
Charset charset = StringEncodingUtils.resolveCharset(encoding);
try (CSVReader csvReader = new CSVReaderBuilder(new InputStreamReader(new ByteArrayInputStream(csvFileBytes), charset)).withCSVParser(new CSVParserBuilder().withSeparator(
delimiterChar).build()).build()) {
@ -214,7 +213,7 @@ public class FileAttributesManagementService {
if (ASCII_ENCODING.equalsIgnoreCase(encoding) || StandardCharsets.US_ASCII.name().equalsIgnoreCase(encoding)) {
return StandardCharsets.US_ASCII;
}
// accept both "ISO" (non-unique name) and the actual name "US-ASCII" of the charset
// accept only name "ISO_8859_1" of the charset
if (ISO_ENCODING.equalsIgnoreCase(encoding) || StandardCharsets.ISO_8859_1.name().equalsIgnoreCase(encoding)) {
return StandardCharsets.ISO_8859_1;
}

View File

@ -1,8 +1,8 @@
package com.iqser.red.service.persistence.management.v1.processor.service.persistence;
import static com.iqser.red.service.persistence.management.v1.processor.service.FileAttributesManagementService.ASCII_ENCODING;
import static com.iqser.red.service.persistence.management.v1.processor.service.FileAttributesManagementService.ISO_ENCODING;
import static com.iqser.red.service.persistence.management.v1.processor.service.FileAttributesManagementService.UTF_ENCODING;
import static com.iqser.red.service.persistence.service.v1.api.external.resource.FileAttributesResource.ASCII_ENCODING;
import static com.iqser.red.service.persistence.service.v1.api.external.resource.FileAttributesResource.ISO_ENCODING;
import static com.iqser.red.service.persistence.service.v1.api.external.resource.FileAttributesResource.UTF_ENCODING;
import java.util.List;
import java.util.Objects;

View File

@ -1,10 +1,15 @@
package com.iqser.red.service.persistence.management.v1.processor.utils;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.StandardCharsets;
import java.nio.charset.UnsupportedCharsetException;
import org.apache.commons.lang3.StringUtils;
import com.iqser.red.service.persistence.management.v1.processor.exception.BadRequestException;
import lombok.experimental.UtilityClass;
@UtilityClass
@ -30,4 +35,17 @@ public final class StringEncodingUtils {
return result.toString();
}
public static Charset resolveCharset(String encoding) {
try {
return Charset.forName(encoding);
} catch (IllegalCharsetNameException e) {
throw new BadRequestException("Invalid character encoding: " + encoding);
} catch (UnsupportedCharsetException e) {
throw new BadRequestException("Unsupported character encoding: " + encoding);
} catch (IllegalArgumentException e) {
throw new BadRequestException("Encoding can't be null.");
}
}
}

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.peristence.v1.server.integration.tests;
import static com.iqser.red.service.persistence.service.v1.api.external.resource.FileAttributesResource.UTF_ENCODING;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
@ -56,7 +57,7 @@ public class ComponentMappingTest extends AbstractPersistenceServerServiceTest {
ComponentMappingMetadataModel componentMappingMetadataModel = dossierTemplateExternalClient.uploadMapping(dossierTemplate.getId(),
mockMultipartFile,
"file",
"UTF-8",
UTF_ENCODING,
",",
"\"");
@ -81,7 +82,7 @@ public class ComponentMappingTest extends AbstractPersistenceServerServiceTest {
componentMappingMetadataModel.getId(),
updateMockMultipartFile,
"file",
"UTF-8",
UTF_ENCODING,
",",
"\"");
@ -101,7 +102,7 @@ public class ComponentMappingTest extends AbstractPersistenceServerServiceTest {
IOUtils.toByteArray(new ClassPathResource("files/componentmapping/empty.csv").getInputStream()));
String id = dossierTemplate.getId();
var result = assertThrows(FeignException.class, () -> dossierTemplateExternalClient.uploadMapping(id, mockMultipartFile, "file", "UTF-8", ",", "\""));
var result = assertThrows(FeignException.class, () -> dossierTemplateExternalClient.uploadMapping(id, mockMultipartFile, "file", UTF_ENCODING, ",", "\""));
assertTrue(result.getMessage().contains("CSV file can not be empty!"));
}

View File

@ -1,7 +1,8 @@
package com.iqser.red.service.peristence.v1.server.integration.tests;
import static com.iqser.red.service.persistence.management.v1.processor.service.FileAttributesManagementService.ASCII_ENCODING;
import static com.iqser.red.service.persistence.management.v1.processor.service.FileAttributesManagementService.UTF_ENCODING;
import static com.iqser.red.service.persistence.service.v1.api.external.resource.FileAttributesResource.ASCII_ENCODING;
import static com.iqser.red.service.persistence.service.v1.api.external.resource.FileAttributesResource.ISO_ENCODING;
import static com.iqser.red.service.persistence.service.v1.api.external.resource.FileAttributesResource.UTF_ENCODING;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
@ -73,7 +74,7 @@ public class FileAttributeTest extends AbstractPersistenceServerServiceTest {
var generalConfig = new FileAttributesConfig();
generalConfig.setDelimiter(",");
generalConfig.setEncoding("UTF-8");
generalConfig.setEncoding(UTF_ENCODING);
generalConfig.setFilenameMappingColumnHeaderName("Name");
fileAttributeConfigClient.setFileAttributesConfig(dossier.getDossierTemplateId(), generalConfig);
@ -196,7 +197,7 @@ public class FileAttributeTest extends AbstractPersistenceServerServiceTest {
var generalConfig = new FileAttributesConfig();
generalConfig.setDelimiter(",");
generalConfig.setEncoding("UTF-8");
generalConfig.setEncoding(UTF_ENCODING);
generalConfig.setFilenameMappingColumnHeaderName("Name");
fileAttributeConfigClient.setFileAttributesConfig(dossier.getDossierTemplateId(), generalConfig);
@ -248,7 +249,7 @@ public class FileAttributeTest extends AbstractPersistenceServerServiceTest {
var generalConfig = new FileAttributesConfig();
generalConfig.setDelimiter(",");
generalConfig.setEncoding("UTF-8");
generalConfig.setEncoding(UTF_ENCODING);
generalConfig.setFilenameMappingColumnHeaderName("Name");
fileAttributeConfigClient.setFileAttributesConfig(dossier.getDossierTemplateId(), generalConfig);
@ -324,7 +325,7 @@ public class FileAttributeTest extends AbstractPersistenceServerServiceTest {
var generalConfig = new FileAttributesConfig();
generalConfig.setDelimiter(",");
generalConfig.setEncoding("UTF-8");
generalConfig.setEncoding(UTF_ENCODING);
generalConfig.setFilenameMappingColumnHeaderName("Path");
fileAttributeConfigClient.setFileAttributesConfig(dossier.getDossierTemplateId(), generalConfig);
@ -374,4 +375,25 @@ public class FileAttributeTest extends AbstractPersistenceServerServiceTest {
assertTrue(result.getMessage().contains("Invalid CSV file format: Unterminated quoted field at end of CSV line. Beginning of lost text: [4.636.0,4.363.0,4.363.0\\n]") || result.getMessage().contains("Invalid CSV file format: Unterminiertes Anführungszeichen am Ende einer CSV-Zeile. Anfang des verlorenen Textes: [4.636.0,4.363.0,4.363.0\\n]"));
}
@Test
public void testParsingEncoding() {
var dossier = dossierTesterAndProvider.provideTestDossier();
var generalConfig = new FileAttributesConfig();
generalConfig.setDelimiter(",");
generalConfig.setEncoding("ISO");
generalConfig.setFilenameMappingColumnHeaderName("Name");
var e = assertThrows(FeignException.class,
() -> fileAttributeConfigClient.setFileAttributesConfig(dossier.getDossierTemplateId(), generalConfig));
assertEquals(400, e.status());
generalConfig.setEncoding(ISO_ENCODING);
fileAttributeConfigClient.setFileAttributesConfig(dossier.getDossierTemplateId(), generalConfig);
var loadedConfig = fileAttributeConfigClient.getFileAttributesConfiguration(dossier.getDossierTemplateId());
assertThat(loadedConfig.getEncoding()).isEqualTo(ISO_ENCODING);
}
}

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.peristence.v1.server.integration.tests;
import static com.iqser.red.service.persistence.service.v1.api.external.resource.FileAttributesResource.UTF_ENCODING;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
@ -765,7 +766,7 @@ public class FileTest extends AbstractPersistenceServerServiceTest {
fileManagementStorageService.storeObject(dossier.getId(), fileId, FileType.UNTOUCHED, new ByteArrayInputStream("test".getBytes(StandardCharsets.UTF_8)));
when(fileAttributeConfigPersistenceService.getFileAttributesGeneralConfiguration(anyString())).thenReturn(FileAttributesGeneralConfigurationEntity.builder()
.delimiter(",")
.encoding("UTF-8")
.encoding(UTF_ENCODING)
.build());
when(fileAttributeConfigPersistenceService.getFileAttributes(anyString())).thenReturn(Collections.emptyList());
assertThrows(FeignException.class, () -> uploadClient.upload(malformedCsvFile, dossier.getId(), false, false));