Pull request #143: prepared redaction service for queue handling

Merge in RED/redaction-service from redaction-system-queue-prep to master

* commit 'ab3b30b28256a93fff79b642080ddc1d940992a9':
  prepared redaction service for queue handling
This commit is contained in:
Timo Bejan 2021-04-21 13:55:48 +02:00
commit b22e3d93c0
9 changed files with 149 additions and 7 deletions

View File

@ -11,6 +11,9 @@ import lombok.NoArgsConstructor;
@AllArgsConstructor
public class AnalyzeResult {
private String projectId;
private String fileId;
private long duration;
private int numberOfPages;
private boolean hasHints;
private boolean hasRequests;

View File

@ -83,6 +83,12 @@
<artifactId>spring-cloud-starter-openfeign</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-amqp</artifactId>
<version>2.3.1.RELEASE</version>
</dependency>
<!-- test dependencies -->
<dependency>
<groupId>org.springframework.boot</groupId>
@ -94,6 +100,12 @@
<artifactId>test-commons</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.amqp</groupId>
<artifactId>spring-rabbit-test</artifactId>
<version>2.3.1</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>

View File

@ -0,0 +1,35 @@
package com.iqser.red.service.redaction.v1.server.queue;
import lombok.RequiredArgsConstructor;
import org.springframework.amqp.core.Queue;
import org.springframework.amqp.core.QueueBuilder;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
@RequiredArgsConstructor
public class MessagingConfiguration {
public static final String REDACTION_QUEUE = "redactionQueue";
public static final String REDACTION_DQL = "redactionDQL";
@Bean
public Queue redactionQueue() {
return QueueBuilder.durable(REDACTION_QUEUE)
.withArgument("x-dead-letter-exchange", "")
.withArgument("x-dead-letter-routing-key", REDACTION_QUEUE)
.maxPriority(2)
.build();
}
@Bean
public Queue redactionDeadLetterQueue() {
return QueueBuilder.durable(REDACTION_DQL).build();
}
}

View File

@ -0,0 +1,46 @@
package com.iqser.red.service.redaction.v1.server.queue;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
import com.iqser.red.service.redaction.v1.server.redaction.service.ReanalyzeService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.amqp.rabbit.annotation.RabbitHandler;
import org.springframework.amqp.rabbit.annotation.RabbitListener;
import org.springframework.stereotype.Service;
import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfiguration.REDACTION_DQL;
import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfiguration.REDACTION_QUEUE;
@Slf4j
@Service
@RequiredArgsConstructor
public class RedactionMessageReceiver {
private final ObjectMapper objectMapper;
private final ReanalyzeService reanalyzeService;
@RabbitHandler
@RabbitListener(queues = REDACTION_QUEUE)
public void receiveAnalyzeRequest(String in) throws JsonProcessingException {
var analyzeRequest = objectMapper.readValue(in, AnalyzeRequest.class);
log.info("Processing analyze request: {}", analyzeRequest);
if (analyzeRequest.isReanalyseOnlyIfPossible()) {
reanalyzeService.reanalyze(analyzeRequest);
} else {
reanalyzeService.analyze(analyzeRequest);
}
log.info("Successfully analyzed {}", analyzeRequest);
// TODO respond to file-api
}
@RabbitHandler
@RabbitListener(queues = REDACTION_DQL)
public void receiveAnalyzeRequestDQL(String in) throws JsonProcessingException {
var analyzeRequest = objectMapper.readValue(in, AnalyzeRequest.class);
log.info("Failed to process analyze request: {}", analyzeRequest);
// TODO respond to file-api
}
}

View File

@ -9,7 +9,7 @@ import org.springframework.stereotype.Service;
@Service
public class AnalyzeResponseService {
public AnalyzeResult createAnalyzeResponse(int pageCount, RedactionLog redactionLog, RedactionChangeLog redactionChangeLog) {
public AnalyzeResult createAnalyzeResponse(String projectId, String fileId, long duration, int pageCount, RedactionLog redactionLog, RedactionChangeLog redactionChangeLog) {
boolean hasHints = redactionLog.getRedactionLogEntry().stream().anyMatch(RedactionLogEntry::isHint);
boolean hasRequests = redactionLog.getRedactionLogEntry()
@ -31,6 +31,9 @@ public class AnalyzeResponseService {
.isEmpty() && redactionChangeLog.getRedactionLogEntry().stream().anyMatch(entry -> !entry.getType().equals("false_positive"));
return AnalyzeResult.builder()
.projectId(projectId)
.fileId(fileId)
.duration(duration)
.numberOfPages(pageCount)
.hasHints(hasHints)
.hasRedactions(hasRedactions)

View File

@ -38,6 +38,7 @@ public class ReanalyzeService {
private final AnalyzeResponseService analyzeResponseService;
public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) {
long startTime = System.currentTimeMillis();
var pageCount = 0;
Document classifiedDoc;
@ -70,12 +71,14 @@ public class ReanalyzeService {
redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.TEXT, new Text(pageCount, classifiedDoc.getSectionText()));
redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.SECTION_GRID, classifiedDoc.getSectionGrid());
return analyzeResponseService.createAnalyzeResponse(pageCount, redactionLog, changeLog);
long duration = System.currentTimeMillis() - startTime;
return analyzeResponseService.createAnalyzeResponse(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), duration, pageCount, redactionLog, changeLog);
}
@SneakyThrows
public AnalyzeResult reanalyze(@RequestBody AnalyzeRequest analyzeRequest) {
long startTime = System.currentTimeMillis();
var redactionLog = redactionStorageService.getRedactionLog(analyzeRequest.getProjectId(), analyzeRequest.getFileId());
var text = redactionStorageService.getText(analyzeRequest.getProjectId(), analyzeRequest.getFileId());
@ -131,10 +134,7 @@ public class ReanalyzeService {
log.info("Should reanalyze {} sections for request: {}", sectionsToReanalyse.size(), analyzeRequest);
if (sectionsToReanalyse.isEmpty() && (manualAdds == null || manualAdds.isEmpty())) {
redactionLog.setDictionaryVersion(dictionaryIncrement.getDictionaryVersion());
var changeLog = redactionChangeLogService.createAndStoreChangeLog(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), redactionLog);
redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLog);
return analyzeResponseService.createAnalyzeResponse(text.getNumberOfPages(), redactionLog, changeLog);
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement);
}
List<SectionText> reanalysisSections = new ArrayList<>();
@ -230,12 +230,20 @@ public class ReanalyzeService {
redactionLog.getRedactionLogEntry().removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()) && !entry.isImage() || entry.getSectionNumber() == 0 && !entry.isImage());
redactionLog.getRedactionLogEntry().addAll(newRedactionLogEntries);
return finalizeAnalysis
(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement);
}
private AnalyzeResult finalizeAnalysis(@RequestBody AnalyzeRequest analyzeRequest, long startTime, RedactionLog redactionLog, Text text, DictionaryIncrement dictionaryIncrement) {
redactionLog.setDictionaryVersion(dictionaryIncrement.getDictionaryVersion());
var changeLog = redactionChangeLogService.createAndStoreChangeLog(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), redactionLog);
redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLog);
return analyzeResponseService.createAnalyzeResponse(text.getNumberOfPages(), redactionLog, changeLog);
long duration = System.currentTimeMillis() - startTime;
return analyzeResponseService.createAnalyzeResponse(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), duration, text.getNumberOfPages(), redactionLog, changeLog);
}

View File

@ -113,6 +113,14 @@ public class SectionsBuilderService {
paragraphMap.computeIfAbsent(1, x -> new TreeSet<>()).add(paragraph);
}
// first page is always a paragraph, else we can't process pages 1..N,
// where N is the first found page with a paragraph
if (paragraphMap.get(1) == null) {
Paragraph paragraph = new Paragraph();
document.getParagraphs().add(paragraph);
paragraphMap.computeIfAbsent(1, x -> new TreeSet<>()).add(paragraph);
}
for (Page page : document.getPages()) {
for (PdfImage image : page.getImages()) {
SortedSet<Paragraph> paragraphsOnPage = paragraphMap.get(page.getPageNumber());

View File

@ -10,6 +10,20 @@ server:
spring:
profiles:
active: kubernetes
rabbitmq:
host: ${RABBITMQ_HOST:localhost}
port: ${RABBITMQ_PORT:5672}
username: ${RABBITMQ_USERNAME:user}
password: ${RABBITMQ_PASSWORD:rabbitmq}
listener:
simple:
acknowledge-mode: AUTO
concurrency: 2
retry:
enabled: true
max-attempts: 3
max-interval: 15000
prefetch: 1
management:
endpoint:

View File

@ -26,6 +26,7 @@ import org.kie.api.builder.KieBuilder;
import org.kie.api.builder.KieFileSystem;
import org.kie.api.builder.KieModule;
import org.kie.api.runtime.KieContainer;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.context.TestConfiguration;
@ -99,6 +100,9 @@ public class RedactionIntegrationTest {
@MockBean
private AmazonS3 amazonS3;
@MockBean
private RabbitTemplate rabbitTemplate;
private final Map<String, List<String>> dictionary = new HashMap<>();
private final Map<String, String> typeColorMap = new HashMap<>();
private final Map<String, Boolean> hintTypeMap = new HashMap<>();
@ -450,6 +454,15 @@ public class RedactionIntegrationTest {
}
@Test
public void test270Rotated() {
AnalyzeRequest request = prepareStorage("files/Minimal Examples/270Rotated.pdf");
MemoryStats.printMemoryStats();
AnalyzeResult result = redactionController.analyze(request);
assertThat(result).isNotNull();
}
@Test
public void testLargeScannedFileOOM() {
AnalyzeRequest request = prepareStorage("scanned/VV-377031.pdf");