RED-9964: fix errors with images
This commit is contained in:
parent
895bc56590
commit
cd2bda15aa
@ -56,6 +56,7 @@ public class Page {
|
||||
public TextBlock getMainBodyTextBlock() {
|
||||
|
||||
return textBlocksOnPage.stream()
|
||||
.filter(atb -> !atb.isEmpty())
|
||||
.collect(new TextBlockCollector());
|
||||
}
|
||||
|
||||
|
||||
@ -86,6 +86,7 @@ public class DocumentGraphMapper {
|
||||
switch (entryData.getType()) {
|
||||
case HEADER -> pages.forEach(page -> page.setHeader((Header) node));
|
||||
case FOOTER -> pages.forEach(page -> page.setFooter((Footer) node));
|
||||
case IMAGE -> pages.forEach(page -> page.getImages().add((Image) node));
|
||||
default -> textBlock.getAtomicTextBlocks()
|
||||
.forEach(atb -> atb.getPage().getTextBlocksOnPage().add(atb));
|
||||
}
|
||||
|
||||
@ -19,11 +19,13 @@ import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
@ -60,7 +62,6 @@ import com.iqser.red.service.redaction.v1.server.service.DictionaryService;
|
||||
import com.iqser.red.service.redaction.v1.server.service.websocket.RedisSyncedWebSocketService;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
import com.iqser.red.service.redaction.v1.server.testcontainers.MongoDBTestContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.exception.NotFoundException;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import com.knecon.fforesight.keycloakcommons.security.TenantAuthenticationManagerResolver;
|
||||
import com.knecon.fforesight.mongo.database.commons.liquibase.TenantMongoLiquibaseExecutor;
|
||||
@ -85,7 +86,25 @@ import lombok.extern.slf4j.Slf4j;
|
||||
* This way you can recreate what is happening on the stack almost exactly.
|
||||
*/ public class AnalysisEnd2EndTest {
|
||||
|
||||
Path dossierTemplateToUse = Path.of("/home/kschuettler/iqser/business-logic/documine/cpglobal/Flora SCM (Do Not Edit)"); // Add your dossier-template here
|
||||
// These files will be uploaded if they are present in the folder
|
||||
public static final Set<FileType> ENDINGS_TO_UPLOAD = Set.of(FileType.ORIGIN,
|
||||
FileType.DOCUMENT_PAGES,
|
||||
FileType.DOCUMENT_POSITION,
|
||||
FileType.DOCUMENT_STRUCTURE,
|
||||
FileType.DOCUMENT_TEXT,
|
||||
FileType.IMAGE_INFO,
|
||||
FileType.NER_ENTITIES,
|
||||
FileType.TABLES,
|
||||
FileType.IMPORTED_REDACTIONS);
|
||||
|
||||
// These files must be present in the folder or the test will skip the file
|
||||
public static final Set<FileType> REQUIRED_FILES = Set.of(FileType.ORIGIN,
|
||||
FileType.DOCUMENT_PAGES,
|
||||
FileType.DOCUMENT_POSITION,
|
||||
FileType.DOCUMENT_STRUCTURE,
|
||||
FileType.DOCUMENT_TEXT);
|
||||
|
||||
Path dossierTemplateToUse = Path.of("/home/kschuettler/Downloads/mainBodyFailed/DOSSIER_TEMPLATE"); // Add your dossier-template here
|
||||
ObjectMapper mapper = ObjectMapperFactory.create();
|
||||
final String TENANT_ID = "tenant";
|
||||
TestDossierTemplate testDossierTemplate;
|
||||
@ -124,7 +143,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@SneakyThrows
|
||||
public void runAnalysisEnd2End() {
|
||||
|
||||
String folder = "/home/kschuettler/Dokumente/Ticket Related/RED-9964/17a25133-e098-4610-b553-d1bf11a56d96/560e6ab1ab4754b9a62fd2e6d4d71327"; // Should contain all files from minio directly, still zipped. Can contain multiple files.
|
||||
String folder = "/home/kschuettler/Downloads/mainBodyFailed/728d0af4-f4c4-4bc9-acf8-7d2632b02962/"; // Should contain all files from minio directly, still zipped. Can contain multiple files.
|
||||
|
||||
Path absoluteFolderPath;
|
||||
if (folder.startsWith("files")) { // if it starts with "files" it is most likely in the resources folder, else it should be an absolute path
|
||||
@ -136,11 +155,14 @@ import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
log.info("Starting end2end analyses for all distinct filenames in folder: {}", folder);
|
||||
List<AnalyzeRequest> analyzeRequests = prepareStorageForFolder(absoluteFolderPath);
|
||||
log.info("Found {} distinct fileIds", analyzeRequests.size());
|
||||
log.info("Found {} distinct fileIds with all required files", analyzeRequests.size());
|
||||
for (int i = 0; i < analyzeRequests.size(); i++) {
|
||||
AnalyzeRequest analyzeRequest = analyzeRequests.get(i);
|
||||
log.info("----------------------------------------------------------------------------------");
|
||||
log.info("{}/{}: Starting analysis for file {}", i + 1, analyzeRequests.size(), analyzeRequest.getFileId());
|
||||
analyzeService.analyze(analyzeRequest);
|
||||
log.info("----------------------------------------------------------------------------------");
|
||||
log.info("");
|
||||
}
|
||||
}
|
||||
|
||||
@ -191,22 +213,36 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@SneakyThrows
|
||||
private List<AnalyzeRequest> prepareStorageForFolder(Path folder) {
|
||||
|
||||
return Files.list(folder)
|
||||
.map(this::parseFileId)
|
||||
.distinct()
|
||||
return findOriginFiles(folder).stream()
|
||||
.map(fileId -> prepareStorageForFile(fileId, folder))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
private String parseFileId(Path path) {
|
||||
private Set<String> findOriginFiles(Path folder) throws IOException {
|
||||
|
||||
return path.getFileName().toString().split("\\.")[0];
|
||||
return Files.walk(folder)
|
||||
.map(this::parseFileName)
|
||||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
|
||||
private String parseFileName(Path path) {
|
||||
|
||||
String suffix = ".ORIGIN.pdf";
|
||||
if (!path.getFileName().toString().endsWith(suffix)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return path.getFileName().toString().replace(suffix, "");
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private AnalyzeRequest prepareStorageForFile(String fileId, Path folder) {
|
||||
private Optional<AnalyzeRequest> prepareStorageForFile(String fileName, Path folder) {
|
||||
|
||||
AnalyzeRequest request = new AnalyzeRequest();
|
||||
request.setDossierId(UUID.randomUUID().toString());
|
||||
@ -214,45 +250,38 @@ import lombok.extern.slf4j.Slf4j;
|
||||
request.setDossierTemplateId(testDossierTemplate.id);
|
||||
request.setAnalysisNumber(-1);
|
||||
|
||||
Path manualRedactionFile = folder.resolve(fileId + ".MANUAL_REDACTIONS.json");
|
||||
Path manualRedactionFile = folder.resolve(fileName + ".MANUAL_REDACTIONS.json");
|
||||
if (Files.exists(manualRedactionFile)) {
|
||||
request.setManualRedactions(parseManualRedactions(manualRedactionFile));
|
||||
} else {
|
||||
request.setManualRedactions(new ManualRedactions());
|
||||
}
|
||||
|
||||
Set<FileType> endingsToUpload = Set.of("ORIGIN",
|
||||
"DOCUMENT_PAGES",
|
||||
"DOCUMENT_POSITION",
|
||||
"DOCUMENT_STRUCTURE",
|
||||
"DOCUMENT_TEXT",
|
||||
"IMAGE_INFO",
|
||||
"NER_ENTITIES",
|
||||
"TABLES",
|
||||
"IMPORTED_REDACTIONS")
|
||||
.stream()
|
||||
.map(FileType::valueOf)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
Set<FileType> uploadedFileTypes = Files.walk(folder)
|
||||
.filter(path -> path.toFile().isFile())
|
||||
.filter(path -> parseFileTypeFromPath(path).map(endingsToUpload::contains)
|
||||
.orElse(false))
|
||||
.map(filePath -> uploadFile(filePath, request))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
Set<FileType> uploadedFileTypes = findFilesToUpload(fileName, folder, ENDINGS_TO_UPLOAD).map(filePath -> uploadFile(filePath, request))
|
||||
.map(FileToUpload::fileType)
|
||||
.collect(Collectors.toUnmodifiableSet());
|
||||
|
||||
Set<FileType> missingFileTypes = Sets.difference(endingsToUpload, uploadedFileTypes);
|
||||
Set<FileType> missingFileTypes = Sets.difference(REQUIRED_FILES, uploadedFileTypes);
|
||||
|
||||
if (!missingFileTypes.isEmpty()) {
|
||||
log.error("Folder {} is missing files of type {}",
|
||||
folder.toFile(),
|
||||
missingFileTypes.stream()
|
||||
.map(Enum::toString)
|
||||
.collect(Collectors.joining(", ")));
|
||||
throw new NotFoundException("Not all required file types are present.");
|
||||
return Optional.empty();
|
||||
}
|
||||
return request;
|
||||
return Optional.of(request);
|
||||
}
|
||||
|
||||
|
||||
private static Stream<FileToUpload> findFilesToUpload(String fileName, Path folder, Set<FileType> endingsToUpload) throws IOException {
|
||||
|
||||
return Files.walk(folder)
|
||||
.filter(path -> path.toFile().isFile())
|
||||
.map(path -> parseFileTypeFromPath(path, fileName, endingsToUpload))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
|
||||
@ -267,11 +296,19 @@ import lombok.extern.slf4j.Slf4j;
|
||||
}
|
||||
|
||||
|
||||
private static Optional<FileType> parseFileTypeFromPath(Path path) {
|
||||
private static Optional<FileToUpload> parseFileTypeFromPath(Path path, String fileName, Set<FileType> endingsToUpload) {
|
||||
|
||||
if (!path.getFileName().toString().startsWith(fileName)) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
String fileType = path.getFileName().toString().split("\\.")[1];
|
||||
try {
|
||||
return Optional.of(FileType.valueOf(fileType));
|
||||
String fileTypeString = path.getFileName().toString().split("\\.")[1];
|
||||
FileType fileType = FileType.valueOf(fileTypeString);
|
||||
if (!endingsToUpload.contains(fileType)) {
|
||||
return Optional.empty();
|
||||
}
|
||||
return Optional.of(new FileToUpload(path, fileType));
|
||||
} catch (IllegalArgumentException e) {
|
||||
return Optional.empty();
|
||||
}
|
||||
@ -279,26 +316,26 @@ import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private Optional<FileType> uploadFile(Path path, AnalyzeRequest request) {
|
||||
private FileToUpload uploadFile(FileToUpload fileToUpload, AnalyzeRequest request) {
|
||||
|
||||
Optional<FileType> fileType = parseFileTypeFromPath(path);
|
||||
if (fileType.isEmpty()) {
|
||||
return Optional.empty();
|
||||
}
|
||||
if (path.getFileName().toString().endsWith(".gz")) {
|
||||
try (var fis = new FileInputStream(path.toFile()); var in = new GZIPInputStream(fis);) {
|
||||
storageService.storeObject(TENANT_ID, RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), fileType.get()), in);
|
||||
if (fileToUpload.path().getFileName().toString().endsWith(".gz")) {
|
||||
try (var fis = new FileInputStream(fileToUpload.path().toFile()); var in = new GZIPInputStream(fis);) {
|
||||
storageService.storeObject(TENANT_ID,
|
||||
RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), fileToUpload.fileType()),
|
||||
in);
|
||||
}
|
||||
} else {
|
||||
try (var in = new FileInputStream(path.toFile())) {
|
||||
storageService.storeObject(TENANT_ID, RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), fileType.get()), in);
|
||||
try (var in = new FileInputStream(fileToUpload.path().toFile())) {
|
||||
storageService.storeObject(TENANT_ID,
|
||||
RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), fileToUpload.fileType()),
|
||||
in);
|
||||
}
|
||||
}
|
||||
return fileType;
|
||||
return fileToUpload;
|
||||
}
|
||||
|
||||
|
||||
private class TestDossierTemplate {
|
||||
public class TestDossierTemplate {
|
||||
|
||||
String id;
|
||||
Dictionary testDictionary;
|
||||
@ -398,4 +435,8 @@ import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
}
|
||||
|
||||
private record FileToUpload(Path path, FileType fileType) {
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user