RED-6009: Document Tree Structure

*added dead letter queue
*moved queue names to internal api module
This commit is contained in:
Kilian Schuettler 2023-04-12 17:59:22 +02:00
parent d5635e9660
commit c06e34b6d2
15 changed files with 118 additions and 168 deletions

View File

@ -0,0 +1,10 @@
package com.knecon.fforesight.service.layoutparser.internal.api.queue;
import java.util.Map;
import lombok.Builder;
@Builder
public record LayoutParsingFinishedEvent(Map<String, String> identifier, long duration, int numberOfPages, String message) {
}

View File

@ -0,0 +1,8 @@
package com.knecon.fforesight.service.layoutparser.internal.api.queue;
public class LayoutParsingQueueNames {
public static final String LAYOUT_PARSING_REQUEST_QUEUE = "LAYOUTPARSING_REQUEST_QUEUE";
public static final String LAYOUT_PARSING_DLQ = "LAYOUTPARSING_DLQ";
public static final String LAYOUT_PARSING_FINISHED_EVENT_QUEUE = "LAYOUTPARSING_FINISHED_EVENT_QUEUE";
}

View File

@ -0,0 +1,20 @@
package com.knecon.fforesight.service.layoutparser.internal.api.queue;
import java.util.Map;
import java.util.Optional;
import lombok.Builder;
@Builder
public record LayoutParsingRequest(
Map<String, String> identifier,
String originFileStorageId,
Optional<String> tablesFileStorageId,
Optional<String> imagesFileStorageId,
String structureFileStorageId,
String textBlockFileStorageId,
String positionBlockFileStorageId,
String pageFileStorageId) {
}

View File

@ -18,8 +18,8 @@ import com.knecon.fforesight.service.layoutparser.processor.classification.servi
import com.knecon.fforesight.service.layoutparser.processor.classification.service.PdfParsingService;
import com.knecon.fforesight.service.layoutparser.processor.classification.service.SectionsBuilderService;
import com.knecon.fforesight.service.layoutparser.processor.factory.DocumentGraphFactory;
import com.knecon.fforesight.service.layoutparser.processor.queue.LayoutParsingFinishedEvent;
import com.knecon.fforesight.service.layoutparser.processor.queue.LayoutParsingRequest;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingFinishedEvent;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@ -38,61 +38,36 @@ public class LayoutParsingService {
private final DocumentGraphFactory documentGraphFactory;
public LayoutParsingFinishedEvent parseLayoutAndSaveFilesToStorage(LayoutParsingRequest layoutParsingRequest) {
public LayoutParsingFinishedEvent parseLayoutAndSaveFilesToStorage(LayoutParsingRequest layoutParsingRequest) throws IOException {
PDDocument originDocument;
try {
originDocument = layoutParsingStorageService.getOriginFile(layoutParsingRequest.getOriginFileStorageId());
} catch (IOException e) {
log.error(e.toString());
return LayoutParsingFinishedEvent.builder()
.status(400)
.message(format("Origin PDF File with id %s could not be loaded!", layoutParsingRequest.getPageFileStorageId()))
.build();
}
long start = System.currentTimeMillis();
PDDocument originDocument = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId());
ImageServiceResponse imageServiceResponse = new ImageServiceResponse();
if (layoutParsingRequest.getImagesFileStorageId().isPresent()) {
try {
imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.getPageFileStorageId());
} catch (IOException e) {
log.error(e.toString());
return LayoutParsingFinishedEvent.builder()
.status(400)
.message(format("Image Service File with id %s could not be loaded!", layoutParsingRequest.getImagesFileStorageId()))
.build();
}
if (layoutParsingRequest.imagesFileStorageId().isPresent()) {
imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.pageFileStorageId());
}
TableServiceResponse tableServiceResponse = new TableServiceResponse();
if (layoutParsingRequest.getTablesFileStorageId().isPresent()) {
try {
tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.getPageFileStorageId());
} catch (IOException e) {
log.error(e.toString());
return LayoutParsingFinishedEvent.builder()
.status(400)
.message(format("CV Table Parsing File with id %s could not be loaded!", layoutParsingRequest.getPageFileStorageId()))
.build();
}
if (layoutParsingRequest.tablesFileStorageId().isPresent()) {
tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.pageFileStorageId());
}
DocumentGraph documentGraph = parseLayout(originDocument, imageServiceResponse, tableServiceResponse);
int numberOfPages = originDocument.getNumberOfPages();
originDocument.close();
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentGraph));
try {
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentGraph));
} catch (IOException e) {
log.error("Parsed Document files could not be saved!");
log.error(e.getMessage());
return LayoutParsingFinishedEvent.builder().status(500).message("Files could not be saved").build();
}
return LayoutParsingFinishedEvent.builder()
.status(200)
.identifier(layoutParsingRequest.identifier())
.numberOfPages(numberOfPages)
.duration(System.currentTimeMillis() - start)
.message(format("Layout parsing is finished and files have been saved with Ids:\n Structure: %s\nText: %s\nPositions: %s\nPageData: %s",
layoutParsingRequest.getStructureFileStorageId(),
layoutParsingRequest.getTextBlockFileStorageId(),
layoutParsingRequest.getPositionBlockFileStorageId(),
layoutParsingRequest.getPageFileStorageId()))
layoutParsingRequest.structureFileStorageId(),
layoutParsingRequest.textBlockFileStorageId(),
layoutParsingRequest.positionBlockFileStorageId(),
layoutParsingRequest.pageFileStorageId()))
.build();
}

View File

@ -5,6 +5,6 @@ import org.springframework.context.annotation.Configuration;
@Configuration
@ComponentScan
public class LayoutparserServiceProcessorConfiguration {
public class LayoutParsingServiceProcessorConfiguration {
}

View File

@ -22,7 +22,7 @@ import com.knecon.fforesight.service.layoutparser.internal.api.data.TableOfConte
import com.knecon.fforesight.service.layoutparser.processor.adapter.model.image.ImageServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.adapter.model.table.TableServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.multitenancy.TenantContext;
import com.knecon.fforesight.service.layoutparser.processor.queue.LayoutParsingRequest;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@ -69,25 +69,25 @@ public class LayoutParsingStorageService {
public void storeDocumentData(LayoutParsingRequest layoutParsingRequest, DocumentData documentData) throws IOException {
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.getStructureFileStorageId(), documentData.getTableOfContents());
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.getTextBlockFileStorageId(), documentData.getAtomicTextBlocks());
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.getPositionBlockFileStorageId(), documentData.getAtomicPositionBlocks());
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.getPageFileStorageId(), documentData.getPages());
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.structureFileStorageId(), documentData.getTableOfContents());
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.textBlockFileStorageId(), documentData.getAtomicTextBlocks());
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.positionBlockFileStorageId(), documentData.getAtomicPositionBlocks());
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.pageFileStorageId(), documentData.getPages());
}
public DocumentData readDocumentData(LayoutParsingRequest layoutParsingRequest) throws IOException {
PageData[] pageData = storageService.readJSONObject(TenantContext.getTenantId(), layoutParsingRequest.getPageFileStorageId(), PageData[].class);
PageData[] pageData = storageService.readJSONObject(TenantContext.getTenantId(), layoutParsingRequest.pageFileStorageId(), PageData[].class);
AtomicTextBlockData[] atomicTextBlockData = storageService.readJSONObject(TenantContext.getTenantId(),
layoutParsingRequest.getTextBlockFileStorageId(),
layoutParsingRequest.textBlockFileStorageId(),
AtomicTextBlockData[].class);
AtomicPositionBlockData[] atomicPositionBlockData = storageService.readJSONObject(TenantContext.getTenantId(),
layoutParsingRequest.getPositionBlockFileStorageId(),
layoutParsingRequest.positionBlockFileStorageId(),
AtomicPositionBlockData[].class);
TableOfContentsData tableOfContentsData = storageService.readJSONObject(TenantContext.getTenantId(),
layoutParsingRequest.getStructureFileStorageId(),
layoutParsingRequest.structureFileStorageId(),
TableOfContentsData.class);
return DocumentData.builder()

View File

@ -1,11 +0,0 @@
package com.knecon.fforesight.service.layoutparser.processor.queue;
import lombok.Builder;
@Builder
public class LayoutParsingFinishedEvent {
int status;
String message;
}

View File

@ -1,26 +0,0 @@
package com.knecon.fforesight.service.layoutparser.processor.queue;
import java.util.Optional;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class LayoutParsingRequest {
String originFileStorageId;
Optional<String> tablesFileStorageId;
Optional<String> imagesFileStorageId;
String structureFileStorageId;
String textBlockFileStorageId;
String positionBlockFileStorageId;
String pageFileStorageId;
}

View File

@ -1,9 +1,9 @@
package com.knecon.fforesight.service.layoutparser.processor.queue;
import static com.knecon.fforesight.service.layoutparser.processor.queue.MessagingConfiguration.LAYOUTPARSING_REQUEST_QUEUE;
import java.io.IOException;
import static com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingQueueNames.LAYOUT_PARSING_FINISHED_EVENT_QUEUE;
import static com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingQueueNames.LAYOUT_PARSING_REQUEST_QUEUE;
import org.springframework.amqp.AmqpRejectAndDontRequeueException;
import org.springframework.amqp.core.Message;
import org.springframework.amqp.rabbit.annotation.RabbitHandler;
import org.springframework.amqp.rabbit.annotation.RabbitListener;
@ -12,9 +12,12 @@ import org.springframework.stereotype.Service;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingFinishedEvent;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingService;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@ -28,25 +31,27 @@ public class MessageHandler {
@RabbitHandler
@RabbitListener(queues = LAYOUTPARSING_REQUEST_QUEUE)
@RabbitListener(queues = LAYOUT_PARSING_REQUEST_QUEUE)
@SneakyThrows
public void receiveLayoutParsingRequest(Message message) {
LayoutParsingRequest layoutParsingRequest = null;
try {
layoutParsingRequest = objectMapper.readValue(message.getBody(), LayoutParsingRequest.class);
} catch (IOException e) {
sendLayoutParsingFinishedEvent(LayoutParsingFinishedEvent.builder().status(400).message("LayoutParsingRequest could not be deserialized!").build());
throw new RuntimeException(e);
LayoutParsingRequest layoutParsingRequest = objectMapper.readValue(message.getBody(), LayoutParsingRequest.class);
log.info("Layout parsing request received {}", layoutParsingRequest.identifier());
if (message.getMessageProperties().isRedelivered()) {
throw new AmqpRejectAndDontRequeueException(String.format("Error during last layout parsing of request with identifier: %s, do not retry.",
layoutParsingRequest.identifier()));
}
LayoutParsingFinishedEvent layoutParsingFinishedEvent = layoutParsingService.parseLayoutAndSaveFilesToStorage(layoutParsingRequest);
sendLayoutParsingFinishedEvent(layoutParsingFinishedEvent);
log.info("Layout parsing finished {} in {} ms", layoutParsingRequest.identifier(), layoutParsingFinishedEvent.duration());
}
public void sendLayoutParsingFinishedEvent(LayoutParsingFinishedEvent layoutParsingFinishedEvent) {
try {
rabbitTemplate.convertAndSend(MessagingConfiguration.LAYOUTPARSING_FINISHED_EVENT_QUEUE, objectMapper.writeValueAsString(layoutParsingFinishedEvent));
rabbitTemplate.convertAndSend(LAYOUT_PARSING_FINISHED_EVENT_QUEUE, objectMapper.writeValueAsString(layoutParsingFinishedEvent));
} catch (JsonProcessingException e) {
throw new RuntimeException(e);
}

View File

@ -1,5 +1,12 @@
package com.knecon.fforesight.service.layoutparser.processor.queue;
import static com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingQueueNames.LAYOUT_PARSING_FINISHED_EVENT_QUEUE;
import static com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingQueueNames.LAYOUT_PARSING_DLQ;
import static com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingQueueNames.LAYOUT_PARSING_REQUEST_QUEUE;
import org.springframework.amqp.core.Queue;
import org.springframework.amqp.core.QueueBuilder;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import lombok.RequiredArgsConstructor;
@ -8,9 +15,26 @@ import lombok.RequiredArgsConstructor;
@RequiredArgsConstructor
public class MessagingConfiguration {
@Bean
public Queue layoutparsingRequestQueue() {
public static final String LAYOUTPARSING_REQUEST_QUEUE = "LAYOUTPARSING_REQUEST_QUEUE";
public static final String LAYOUTPARSING_FINISHED_EVENT_QUEUE = "LAYOUTPARSING_FINISHED_EVENT_QUEUE";
return QueueBuilder.durable(LAYOUT_PARSING_REQUEST_QUEUE)//
.withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", LAYOUT_PARSING_DLQ).build();
}
@Bean
public Queue layoutparsingResponseQueue() {
return QueueBuilder.durable(LAYOUT_PARSING_FINISHED_EVENT_QUEUE)//
.withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", LAYOUT_PARSING_DLQ).build();
}
@Bean
public Queue layoutparsingDLQ() {
return QueueBuilder.durable(LAYOUT_PARSING_DLQ).build();
}
}

View File

@ -37,28 +37,14 @@
<artifactId>spring-boot-starter-amqp</artifactId>
<version>${spring.version}</version>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<version>5.9.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-test</artifactId>
<version>${spring.version}</version>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-test</artifactId>
<version>6.0.3</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<version>5.3.0</version>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<scope>test</scope>
</dependency>
</dependencies>

View File

@ -9,13 +9,14 @@ import org.springframework.context.annotation.Import;
import com.amazonaws.services.s3.model.metrics.MetricsConfiguration;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.knecon.fforesight.service.layoutparser.processor.LayoutparserServiceProcessorConfiguration;
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingServiceProcessorConfiguration;
import com.knecon.fforesight.service.layoutparser.processor.multitenancy.AsyncConfig;
import com.knecon.fforesight.service.layoutparser.processor.multitenancy.MultiTenancyMessagingConfiguration;
import com.knecon.fforesight.service.layoutparser.processor.multitenancy.MultiTenancyWebConfiguration;
import com.knecon.fforesight.service.layoutparser.processor.multitenancy.TenantsClient;
import com.knecon.fforesight.service.layoutparser.processor.queue.MessagingConfiguration;
@Import({MultiTenancyWebConfiguration.class, AsyncConfig.class, MultiTenancyMessagingConfiguration.class, MetricsConfiguration.class, LayoutparserServiceProcessorConfiguration.class, StorageAutoConfiguration.class})
@Import({MultiTenancyWebConfiguration.class, AsyncConfig.class, MultiTenancyMessagingConfiguration.class, MetricsConfiguration.class, LayoutParsingServiceProcessorConfiguration.class, StorageAutoConfiguration.class, MessagingConfiguration.class})
@EnableFeignClients(basePackageClasses = TenantsClient.class)
@SpringBootApplication(exclude = {SecurityAutoConfiguration.class, ManagementWebSecurityAutoConfiguration.class})
public class Application {

View File

@ -6,7 +6,7 @@ import com.knecon.fforesight.service.layoutparser.internal.api.data.DocumentData
import com.knecon.fforesight.service.layoutparser.internal.api.graph.DocumentGraph;
import com.knecon.fforesight.service.layoutparser.internal.api.mapper.DocumentDataMapper;
import com.knecon.fforesight.service.layoutparser.internal.api.mapper.DocumentGraphMapper;
import com.knecon.fforesight.service.layoutparser.processor.queue.LayoutParsingRequest;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
import lombok.SneakyThrows;

View File

@ -26,7 +26,7 @@ import com.knecon.fforesight.service.layoutparser.internal.api.services.EntityIn
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingStorageService;
import com.knecon.fforesight.service.layoutparser.processor.multitenancy.TenantContext;
import com.knecon.fforesight.service.layoutparser.processor.multitenancy.TenantsClient;
import com.knecon.fforesight.service.layoutparser.processor.queue.LayoutParsingRequest;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
import com.knecon.fforesight.service.layoutparser.server.Application;
import lombok.SneakyThrows;

View File

@ -1,42 +0,0 @@
package com.knecon.fforesight.service.layoutparser.server.utils;
import java.util.Collections;
import java.util.List;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.shared.model.multitenancy.TenantRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.multitenancy.TenantResponse;
import com.knecon.fforesight.service.layoutparser.processor.multitenancy.TenantsClient;
@Service
public class TestTenantsClient implements TenantsClient {
@Override
public void createTenant(TenantRequest tenantRequest) {
}
@Override
public List<TenantResponse> getTenants() {
return Collections.emptyList();
}
@Override
public TenantResponse getTenant(String tenantId) {
return null;
}
@Override
public JSONPrimitive<String> getDeploymentKey(String tenantId) {
return null;
}
}