Merge branch 'RED-9947' into 'master'
RED-9947: search term occurrences implementation for local bulk Closes RED-9947 See merge request redactmanager/redaction-service!504
This commit is contained in:
commit
03e321a824
@ -4,7 +4,7 @@ plugins {
|
||||
}
|
||||
|
||||
description = "redaction-service-api-v1"
|
||||
val persistenceServiceVersion = "2.539.0"
|
||||
val persistenceServiceVersion = "2.545.0"
|
||||
|
||||
dependencies {
|
||||
implementation("org.springframework:spring-web:6.0.12")
|
||||
|
||||
@ -1,9 +1,5 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
import java.util.Queue;
|
||||
|
||||
import org.springframework.context.annotation.Bean;
|
||||
|
||||
public class QueueNames {
|
||||
|
||||
public static final String REDACTION_REQUEST_QUEUE_PREFIX = "redaction_request";
|
||||
@ -13,6 +9,9 @@ public class QueueNames {
|
||||
public static final String REDACTION_RESPONSE_EXCHANGE = "redaction_response_exchange";
|
||||
public static final String REDACTION_DLQ = "redaction_error";
|
||||
|
||||
public static final String SEARCH_TERM_OCCURRENCES_RESPONSE_EXCHANGE = "search_bulk_local_term_response_exchange";
|
||||
public static final String SEARCH_BULK_LOCAL_TERM_DLQ = "search_bulk_local_term_error";
|
||||
|
||||
|
||||
public static final String MIGRATION_REQUEST_QUEUE = "migrationQueue";
|
||||
public static final String MIGRATION_RESPONSE_QUEUE = "migrationResponseQueue";
|
||||
|
||||
@ -16,7 +16,7 @@ val layoutParserVersion = "0.161.0"
|
||||
val jacksonVersion = "2.15.2"
|
||||
val droolsVersion = "9.44.0.Final"
|
||||
val pdfBoxVersion = "3.0.0"
|
||||
val persistenceServiceVersion = "2.539.0"
|
||||
val persistenceServiceVersion = "2.545.0"
|
||||
val llmServiceVersion = "1.11.0"
|
||||
val springBootStarterVersion = "3.1.5"
|
||||
val springCloudVersion = "4.0.4"
|
||||
@ -46,7 +46,7 @@ dependencies {
|
||||
|
||||
implementation("com.iqser.red.commons:dictionary-merge-commons:1.5.0")
|
||||
implementation("com.iqser.red.commons:storage-commons:2.45.0")
|
||||
implementation("com.knecon.fforesight:tenant-commons:0.29.0")
|
||||
implementation("com.knecon.fforesight:tenant-commons:0.30.0")
|
||||
implementation("com.knecon.fforesight:keycloak-commons:0.30.0") {
|
||||
exclude(group = "com.knecon.fforesight", module = "tenant-commons")
|
||||
}
|
||||
|
||||
@ -0,0 +1,75 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.function.BiConsumer;
|
||||
import java.util.function.BinaryOperator;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Supplier;
|
||||
import java.util.stream.Collector;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConcatenatedTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@NoArgsConstructor
|
||||
public class ConsecutiveTextBlockCollector implements Collector<TextBlock, List<ConcatenatedTextBlock>, List<TextBlock>> {
|
||||
|
||||
@Override
|
||||
public Supplier<List<ConcatenatedTextBlock>> supplier() {
|
||||
|
||||
return LinkedList::new;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public BiConsumer<List<ConcatenatedTextBlock>, TextBlock> accumulator() {
|
||||
|
||||
return (existingList, textBlock) -> {
|
||||
if (existingList.isEmpty()) {
|
||||
ConcatenatedTextBlock ctb = ConcatenatedTextBlock.empty();
|
||||
ctb.concat(textBlock);
|
||||
existingList.add(ctb);
|
||||
return;
|
||||
}
|
||||
|
||||
ConcatenatedTextBlock prevBlock = existingList.get(existingList.size() - 1);
|
||||
|
||||
if (prevBlock.getTextRange().end() == textBlock.getTextRange().start()) {
|
||||
prevBlock.concat(textBlock);
|
||||
} else {
|
||||
ConcatenatedTextBlock ctb = ConcatenatedTextBlock.empty();
|
||||
ctb.concat(textBlock);
|
||||
existingList.add(ctb);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public BinaryOperator<List<ConcatenatedTextBlock>> combiner() {
|
||||
|
||||
return (list1, list2) -> Stream.concat(list1.stream(), list2.stream())
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Function<List<ConcatenatedTextBlock>, List<TextBlock>> finisher() {
|
||||
|
||||
return a -> a.stream()
|
||||
.map(tb -> (TextBlock) tb)
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Set<Characteristics> characteristics() {
|
||||
|
||||
return Set.of(Characteristics.IDENTITY_FINISH);
|
||||
}
|
||||
|
||||
}
|
||||
@ -9,6 +9,7 @@ import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.ConsecutiveTextBlockCollector;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
|
||||
|
||||
@ -14,6 +14,7 @@ import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.ConsecutiveTextBlockCollector;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
@ -50,6 +51,23 @@ public interface SemanticNode {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Searches all Nodes located underneath this Node in the DocumentTree that are found on the given pages.
|
||||
* Then consecutive AtomicTextBlocks are concatenated where possible and the list of the resulting TextBlocks is returned.
|
||||
*
|
||||
* @return List of TextBlocks containing all AtomicTextBlocks that are located under this Node on the given pages.
|
||||
*/
|
||||
default List<TextBlock> getTextBlocksByPageNumbers(Set<Integer> pageNumbers) {
|
||||
|
||||
return streamAllSubNodes().filter(SemanticNode::isLeaf)
|
||||
.map(SemanticNode::getTextBlock)
|
||||
.map(TextBlock::getAtomicTextBlocks)
|
||||
.flatMap(List::stream)
|
||||
.filter(atb -> pageNumbers.contains(atb.getPage().getNumber()))
|
||||
.collect(new ConsecutiveTextBlockCollector());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Any Node maintains its own Set of Entities.
|
||||
* This Set contains all Entities whose TextRange intersects the TextRange of this node.
|
||||
|
||||
@ -22,6 +22,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemp
|
||||
import com.iqser.red.service.redaction.v1.server.client.FileStatusProcessingUpdateClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.service.AnalyzeService;
|
||||
import com.iqser.red.service.redaction.v1.server.service.DocumentSearchService;
|
||||
import com.iqser.red.service.redaction.v1.server.service.UnprocessedChangesService;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.exception.DroolsTimeoutException;
|
||||
|
||||
@ -39,6 +40,7 @@ public class RedactionMessageReceiver {
|
||||
private final FileStatusProcessingUpdateClient fileStatusProcessingUpdateClient;
|
||||
private final RulesClient rulesClient;
|
||||
private final UnprocessedChangesService unprocessedChangesService;
|
||||
private final DocumentSearchService documentSearchService;
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
@ -116,6 +118,14 @@ public class RedactionMessageReceiver {
|
||||
log.info("Successful Imported Redactions Analysis Only dossier {} file {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
log.info("-------------------------------------------------------------------------------------------------");
|
||||
break;
|
||||
case SEARCH_BULK_LOCAL_TERM:
|
||||
log.info("------------------------------Search Term occurrences for bulk local add ------------------------------------------");
|
||||
log.info("Starting term search for {} for file {} in dossier {}", analyzeRequest.getBulkLocalRequest().getSearchTerm(), analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
documentSearchService.searchTermOccurrences(analyzeRequest);
|
||||
log.info("Successfully located all term occurrences dossier {} file {} ", analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
log.info("-------------------------------------------------------------------------------------------------");
|
||||
shouldRespond = false;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("Unknown MessageType: " + analyzeRequest.getMessageType());
|
||||
}
|
||||
|
||||
@ -0,0 +1,77 @@
|
||||
package com.iqser.red.service.redaction.v1.server.service;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.springframework.amqp.rabbit.core.RabbitTemplate;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.BulkLocalRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.BulkLocalResponse;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.EntityPosition;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position;
|
||||
import com.iqser.red.service.redaction.v1.model.QueueNames;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.service.document.DocumentGraphMapper;
|
||||
import com.iqser.red.service.redaction.v1.server.service.document.EntityFindingUtility;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.ObservedStorageService;
|
||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||
|
||||
import io.micrometer.observation.annotation.Observed;
|
||||
import lombok.AccessLevel;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
@RequiredArgsConstructor
|
||||
public class DocumentSearchService {
|
||||
|
||||
final RabbitTemplate rabbitTemplate;
|
||||
final ObservedStorageService observedStorageService;
|
||||
final EntityFindingUtility entityFindingUtility;
|
||||
final EntityLogCreatorService entityLogCreatorService;
|
||||
|
||||
|
||||
@Observed(name = "DocumentSearchService", contextualName = "search-term-occurrences")
|
||||
public void searchTermOccurrences(AnalyzeRequest analyzeRequest) {
|
||||
|
||||
BulkLocalRequest bulkLocalRequest = analyzeRequest.getBulkLocalRequest();
|
||||
String searchTerm = bulkLocalRequest.getSearchTerm();
|
||||
Set<Integer> pageNumbers = bulkLocalRequest.getPageNumbers();
|
||||
|
||||
Document document = DocumentGraphMapper.toDocumentGraph(observedStorageService.getDocumentData(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
|
||||
|
||||
List<TextEntity> possibleEntities;
|
||||
if (pageNumbers == null || pageNumbers.isEmpty()) {
|
||||
possibleEntities = entityFindingUtility.findAllPossibleEntitiesByValue(document, searchTerm, bulkLocalRequest.isCaseSensitive());
|
||||
} else {
|
||||
possibleEntities = entityFindingUtility.findAllPossibleEntitiesByValueOnPages(document, searchTerm, bulkLocalRequest.isCaseSensitive(), pageNumbers);
|
||||
}
|
||||
|
||||
List<EntityPosition> entityPositions = possibleEntities.stream()
|
||||
.flatMap(entity -> entityLogCreatorService.toEntityLogEntries(entity, 0, "", "")
|
||||
.stream())
|
||||
.map(entityLogEntry -> new EntityPosition(entityLogEntry.getPositions()))
|
||||
.toList();
|
||||
|
||||
BulkLocalResponse bulkLocalResponse = BulkLocalResponse.builder()
|
||||
.dossierId(analyzeRequest.getDossierId())
|
||||
.fileId(analyzeRequest.getFileId())
|
||||
.searchTerm(searchTerm)
|
||||
.type(bulkLocalRequest.getType())
|
||||
.reason(bulkLocalRequest.getReason())
|
||||
.legalBasis(bulkLocalRequest.getLegalBasis())
|
||||
.section(bulkLocalRequest.getSection())
|
||||
.entityPositions(entityPositions)
|
||||
.build();
|
||||
|
||||
rabbitTemplate.convertAndSend(QueueNames.SEARCH_TERM_OCCURRENCES_RESPONSE_EXCHANGE, TenantContext.getTenantId(), bulkLocalResponse);
|
||||
}
|
||||
|
||||
}
|
||||
@ -173,7 +173,7 @@ public class EntityLogCreatorService {
|
||||
}
|
||||
|
||||
|
||||
private List<EntityLogEntry> toEntityLogEntries(TextEntity textEntity, int analysisNumber, String dossierId, String fileId) {
|
||||
public List<EntityLogEntry> toEntityLogEntries(TextEntity textEntity, int analysisNumber, String dossierId, String fileId) {
|
||||
|
||||
List<EntityLogEntry> entityLogEntries = new ArrayList<>();
|
||||
|
||||
|
||||
@ -7,6 +7,8 @@ import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentData;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
@ -109,6 +111,7 @@ public class DocumentGraphMapper {
|
||||
}
|
||||
|
||||
|
||||
|
||||
private Headline buildHeadline(Context context) {
|
||||
|
||||
return Headline.builder().documentTree(context.documentTree).build();
|
||||
|
||||
@ -25,8 +25,10 @@ import com.iqser.red.service.redaction.v1.server.model.dictionary.SearchImplemen
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
@ -183,7 +185,10 @@ public class EntityFindingUtility {
|
||||
.map(String::trim)
|
||||
.collect(Collectors.toSet()), true);
|
||||
|
||||
return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange())
|
||||
List<TextBlock> textBlocks = node.getTextBlocksByPageNumbers(pageNumbers);
|
||||
|
||||
return textBlocks.stream()
|
||||
.flatMap(tb -> searchImplementation.getBoundaries(tb, tb.getTextRange()))
|
||||
.map(boundary -> entityCreationService.byTextRangeWithEngine(boundary, "temp", EntityType.ENTITY, node, Collections.emptySet()))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
@ -191,4 +196,45 @@ public class EntityFindingUtility {
|
||||
.collect(groupingBy(entity -> entity.getValue().toLowerCase(Locale.ROOT)));
|
||||
}
|
||||
|
||||
|
||||
public List<TextEntity> findAllPossibleEntitiesByValueOnPages(Document document, String value, boolean caseSensitive, Set<Integer> pageNumbers) {
|
||||
|
||||
if (!pageNumbers.stream()
|
||||
.allMatch(document::onPage)) {
|
||||
throw new IllegalArgumentException(format("SemanticNode \"%s\" does not contain these pages %s, it has pages: %s",
|
||||
document,
|
||||
pageNumbers.stream()
|
||||
.filter(pageNumber -> !document.onPage(pageNumber))
|
||||
.toList(),
|
||||
document.getPages()));
|
||||
}
|
||||
|
||||
SearchImplementation searchImplementation = new SearchImplementation(value, !caseSensitive);
|
||||
|
||||
List<TextBlock> textBlocks = document.getTextBlocksByPageNumbers(pageNumbers);
|
||||
|
||||
return textBlocks.stream()
|
||||
.flatMap(tb -> searchImplementation.getBoundaries(tb, tb.getTextRange())
|
||||
.filter(textRange -> entityCreationService.isValidEntityTextRange(tb, textRange)))
|
||||
.map(boundary -> entityCreationService.byTextRangeWithEngine(boundary, "temp", EntityType.ENTITY, document, Collections.emptySet()))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.distinct()
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
public List<TextEntity> findAllPossibleEntitiesByValue(Document document, String value, boolean caseSensitive) {
|
||||
|
||||
SearchImplementation searchImplementation = new SearchImplementation(value, !caseSensitive);
|
||||
|
||||
return searchImplementation.getBoundaries(document.getTextBlock(), document.getTextRange())
|
||||
.filter(textRange -> entityCreationService.isValidEntityTextRange(document.getTextBlock(), textRange))
|
||||
.map(boundary -> entityCreationService.byTextRangeWithEngine(boundary, "temp", EntityType.ENTITY, document, Collections.emptySet()))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.distinct()
|
||||
.toList();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -55,6 +55,7 @@ import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
||||
import com.iqser.red.service.redaction.v1.server.service.AnalyzeService;
|
||||
import com.iqser.red.service.redaction.v1.server.service.DocumentSearchService;
|
||||
import com.iqser.red.service.redaction.v1.server.service.UnprocessedChangesService;
|
||||
import com.iqser.red.service.redaction.v1.server.service.websocket.RedisSyncedWebSocketService;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
@ -156,6 +157,9 @@ public abstract class AbstractRedactionIntegrationTest {
|
||||
@Autowired
|
||||
protected UnprocessedChangesService unprocessedChangesService;
|
||||
|
||||
@Autowired
|
||||
protected DocumentSearchService documentSearchService;
|
||||
|
||||
@Autowired
|
||||
protected EntityLogDocumentRepository entityLogDocumentRepository;
|
||||
@Autowired
|
||||
|
||||
@ -9,6 +9,7 @@ import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
|
||||
import com.iqser.red.commons.jackson.ObjectMapperFactory;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.MessageType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||
@ -26,9 +27,6 @@ public class AnalyseFileRealDataIntegrationTest extends LiveDataIntegrationTest
|
||||
@SneakyThrows
|
||||
public void testFile() {
|
||||
|
||||
ObjectMapper om = new ObjectMapper();
|
||||
om.registerModule(new JavaTimeModule());
|
||||
|
||||
var file = new ClassPathResource(BASE_DIR + "data/" + FILE_NAME + ".pdf").getInputStream();
|
||||
redactionStorageService.storeObject(DOSSIER_ID, FILE_ID, FileType.ORIGIN, file);
|
||||
|
||||
@ -86,4 +84,34 @@ public class AnalyseFileRealDataIntegrationTest extends LiveDataIntegrationTest
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void testSearchTermOccurrences() {
|
||||
|
||||
var file = new ClassPathResource(BASE_DIR + "data/" + FILE_NAME + ".pdf").getInputStream();
|
||||
redactionStorageService.storeObject(DOSSIER_ID, FILE_ID, FileType.ORIGIN, file);
|
||||
|
||||
try {
|
||||
var nerData = new ClassPathResource(BASE_DIR + "data/" + FILE_NAME + ".ner.json").getInputStream();
|
||||
redactionStorageService.storeObject(DOSSIER_ID, FILE_ID, FileType.NER_ENTITIES, nerData);
|
||||
} catch (Exception e) {
|
||||
log.warn("No NER File Provided");
|
||||
redactionServiceSettings.setNerServiceEnabled(false);
|
||||
}
|
||||
|
||||
AnalyzeRequest ar = AnalyzeRequest.builder()
|
||||
.fileId(FILE_ID)
|
||||
.dossierId(DOSSIER_ID)
|
||||
.analysisNumber(1)
|
||||
.dossierTemplateId(DOSSIER_TEMPLATE_ID)
|
||||
.lastProcessed(OffsetDateTime.now())
|
||||
.fileAttributes(List.of())
|
||||
.build();
|
||||
|
||||
ar.setMessageType(MessageType.SEARCH_BULK_LOCAL_TERM);
|
||||
redactionMessageReceiver.receiveAnalyzeRequest(ar, false);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,162 @@
|
||||
package com.iqser.red.service.redaction.v1.server.service.document;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.mockito.ArgumentMatchers.eq;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.ArgumentCaptor;
|
||||
import org.mockito.Captor;
|
||||
import org.springframework.amqp.rabbit.core.RabbitTemplate;
|
||||
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.test.mock.mockito.SpyBean;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.ComponentScan;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.context.annotation.FilterType;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.context.annotation.Primary;
|
||||
import org.springframework.test.context.junit.jupiter.SpringExtension;
|
||||
|
||||
import com.iqser.red.commons.jackson.ObjectMapperFactory;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.BulkLocalRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.BulkLocalResponse;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.EntityPosition;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
|
||||
import com.iqser.red.service.redaction.v1.model.QueueNames;
|
||||
import com.iqser.red.service.redaction.v1.server.AbstractRedactionIntegrationTest;
|
||||
import com.iqser.red.service.redaction.v1.server.Application;
|
||||
import com.iqser.red.service.redaction.v1.server.RedactionIntegrationTest;
|
||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingServiceProcessorConfiguration;
|
||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@ExtendWith(SpringExtension.class)
|
||||
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||
@Import(RedactionIntegrationTest.RedactionIntegrationTestConfiguration.class)
|
||||
public class DocumentSearchServiceTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
private static final String RULES = loadFromClassPath("drools/acceptance_rules.drl");
|
||||
|
||||
@Configuration
|
||||
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class})
|
||||
@Import({LayoutParsingServiceProcessorConfiguration.class})
|
||||
@ComponentScan(excludeFilters = {@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, value = StorageAutoConfiguration.class)})
|
||||
public static class RedactionIntegrationTestConfiguration {
|
||||
|
||||
@Bean
|
||||
@Primary
|
||||
public StorageService inmemoryStorage() {
|
||||
|
||||
return new FileSystemBackedStorageService(ObjectMapperFactory.create());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Captor
|
||||
ArgumentCaptor<BulkLocalResponse> captor;
|
||||
|
||||
@SpyBean
|
||||
RabbitTemplate rabbitTemplate;
|
||||
|
||||
|
||||
@BeforeEach
|
||||
public void stubClients() {
|
||||
|
||||
TenantContext.setTenantId("redaction");
|
||||
|
||||
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(System.currentTimeMillis());
|
||||
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(JSONPrimitive.of(RULES));
|
||||
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.COMPONENT)).thenReturn(-1L);
|
||||
|
||||
loadDictionaryForTest();
|
||||
loadTypeForTest();
|
||||
loadNerForTest();
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID, null, true)).thenReturn(getTemplateDictionaryTypeResponse());
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, null, true)).thenReturn(getDossierDictionaryTypeResponse());
|
||||
mockDictionaryCalls(null);
|
||||
|
||||
when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void testSearchTermOccurrences() {
|
||||
|
||||
String pdfFile = "files/syngenta/CustomerFiles/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf";
|
||||
|
||||
AnalyzeRequest request = uploadFileToStorage(pdfFile);
|
||||
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
|
||||
analyzeService.analyze(request);
|
||||
|
||||
BulkLocalRequest bulkLocalRequest = BulkLocalRequest.builder().searchTerm("AAOEL").type("type").reason("reason").legalBasis("legalBasis").build();
|
||||
request.setBulkLocalRequest(bulkLocalRequest);
|
||||
documentSearchService.searchTermOccurrences(request);
|
||||
|
||||
verify(rabbitTemplate).convertAndSend(eq(QueueNames.SEARCH_TERM_OCCURRENCES_RESPONSE_EXCHANGE), eq(TenantContext.getTenantId()), captor.capture());
|
||||
|
||||
BulkLocalResponse response = captor.getValue();
|
||||
List<EntityPosition> entityPositions = response.getEntityPositions();
|
||||
assertEquals(entityPositions.size(), 5);
|
||||
assertEquals(response.getDossierId(), request.getDossierId());
|
||||
assertEquals(response.getFileId(), request.getFileId());
|
||||
assertEquals(response.getSearchTerm(), bulkLocalRequest.getSearchTerm());
|
||||
assertEquals(response.getType(), bulkLocalRequest.getType());
|
||||
assertEquals(response.getReason(), bulkLocalRequest.getReason());
|
||||
assertEquals(response.getLegalBasis(), bulkLocalRequest.getLegalBasis());
|
||||
assertEquals(response.getSection(), bulkLocalRequest.getSection());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void testSearchTermOccurrencesFilteredByPageNumbers() {
|
||||
|
||||
String pdfFile = "files/syngenta/CustomerFiles/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf";
|
||||
|
||||
AnalyzeRequest request = uploadFileToStorage(pdfFile);
|
||||
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
|
||||
analyzeService.analyze(request);
|
||||
|
||||
BulkLocalRequest bulkLocalRequest = BulkLocalRequest.builder().searchTerm("SANCO/1426/2001").type("type").reason("reason").legalBasis("legalBasis").pageNumbers(Set.of(9, 127, 147)).build();
|
||||
request.setBulkLocalRequest(bulkLocalRequest);
|
||||
documentSearchService.searchTermOccurrences(request);
|
||||
|
||||
verify(rabbitTemplate).convertAndSend(eq(QueueNames.SEARCH_TERM_OCCURRENCES_RESPONSE_EXCHANGE), eq(TenantContext.getTenantId()), captor.capture());
|
||||
|
||||
BulkLocalResponse response = captor.getValue();
|
||||
List<EntityPosition> entityPositions = response.getEntityPositions();
|
||||
assertEquals(entityPositions.size(), 3);
|
||||
assertEquals(response.getDossierId(), request.getDossierId());
|
||||
assertEquals(response.getFileId(), request.getFileId());
|
||||
assertEquals(response.getSearchTerm(), bulkLocalRequest.getSearchTerm());
|
||||
assertEquals(response.getType(), bulkLocalRequest.getType());
|
||||
assertEquals(response.getReason(), bulkLocalRequest.getReason());
|
||||
assertEquals(response.getLegalBasis(), bulkLocalRequest.getLegalBasis());
|
||||
assertEquals(response.getSection(), bulkLocalRequest.getSection());
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user