Merge branch 'RED-9947' into 'master'

RED-9947: search term occurrences implementation for local bulk

Closes RED-9947

See merge request redactmanager/redaction-service!504
This commit is contained in:
Maverick Studer 2024-09-04 11:51:39 +02:00
commit 03e321a824
14 changed files with 435 additions and 12 deletions

View File

@ -4,7 +4,7 @@ plugins {
}
description = "redaction-service-api-v1"
val persistenceServiceVersion = "2.539.0"
val persistenceServiceVersion = "2.545.0"
dependencies {
implementation("org.springframework:spring-web:6.0.12")

View File

@ -1,9 +1,5 @@
package com.iqser.red.service.redaction.v1.model;
import java.util.Queue;
import org.springframework.context.annotation.Bean;
public class QueueNames {
public static final String REDACTION_REQUEST_QUEUE_PREFIX = "redaction_request";
@ -13,6 +9,9 @@ public class QueueNames {
public static final String REDACTION_RESPONSE_EXCHANGE = "redaction_response_exchange";
public static final String REDACTION_DLQ = "redaction_error";
public static final String SEARCH_TERM_OCCURRENCES_RESPONSE_EXCHANGE = "search_bulk_local_term_response_exchange";
public static final String SEARCH_BULK_LOCAL_TERM_DLQ = "search_bulk_local_term_error";
public static final String MIGRATION_REQUEST_QUEUE = "migrationQueue";
public static final String MIGRATION_RESPONSE_QUEUE = "migrationResponseQueue";

View File

@ -16,7 +16,7 @@ val layoutParserVersion = "0.161.0"
val jacksonVersion = "2.15.2"
val droolsVersion = "9.44.0.Final"
val pdfBoxVersion = "3.0.0"
val persistenceServiceVersion = "2.539.0"
val persistenceServiceVersion = "2.545.0"
val llmServiceVersion = "1.11.0"
val springBootStarterVersion = "3.1.5"
val springCloudVersion = "4.0.4"
@ -46,7 +46,7 @@ dependencies {
implementation("com.iqser.red.commons:dictionary-merge-commons:1.5.0")
implementation("com.iqser.red.commons:storage-commons:2.45.0")
implementation("com.knecon.fforesight:tenant-commons:0.29.0")
implementation("com.knecon.fforesight:tenant-commons:0.30.0")
implementation("com.knecon.fforesight:keycloak-commons:0.30.0") {
exclude(group = "com.knecon.fforesight", module = "tenant-commons")
}

View File

@ -0,0 +1,75 @@
package com.iqser.red.service.redaction.v1.server.model.document;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.function.BinaryOperator;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collector;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConcatenatedTextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import lombok.NoArgsConstructor;
@NoArgsConstructor
public class ConsecutiveTextBlockCollector implements Collector<TextBlock, List<ConcatenatedTextBlock>, List<TextBlock>> {
@Override
public Supplier<List<ConcatenatedTextBlock>> supplier() {
return LinkedList::new;
}
@Override
public BiConsumer<List<ConcatenatedTextBlock>, TextBlock> accumulator() {
return (existingList, textBlock) -> {
if (existingList.isEmpty()) {
ConcatenatedTextBlock ctb = ConcatenatedTextBlock.empty();
ctb.concat(textBlock);
existingList.add(ctb);
return;
}
ConcatenatedTextBlock prevBlock = existingList.get(existingList.size() - 1);
if (prevBlock.getTextRange().end() == textBlock.getTextRange().start()) {
prevBlock.concat(textBlock);
} else {
ConcatenatedTextBlock ctb = ConcatenatedTextBlock.empty();
ctb.concat(textBlock);
existingList.add(ctb);
}
};
}
@Override
public BinaryOperator<List<ConcatenatedTextBlock>> combiner() {
return (list1, list2) -> Stream.concat(list1.stream(), list2.stream())
.toList();
}
@Override
public Function<List<ConcatenatedTextBlock>, List<TextBlock>> finisher() {
return a -> a.stream()
.map(tb -> (TextBlock) tb)
.toList();
}
@Override
public Set<Characteristics> characteristics() {
return Set.of(Characteristics.IDENTITY_FINISH);
}
}

View File

@ -9,6 +9,7 @@ import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.model.document.ConsecutiveTextBlockCollector;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;

View File

@ -14,6 +14,7 @@ import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.model.document.ConsecutiveTextBlockCollector;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
@ -50,6 +51,23 @@ public interface SemanticNode {
}
/**
* Searches all Nodes located underneath this Node in the DocumentTree that are found on the given pages.
* Then consecutive AtomicTextBlocks are concatenated where possible and the list of the resulting TextBlocks is returned.
*
* @return List of TextBlocks containing all AtomicTextBlocks that are located under this Node on the given pages.
*/
default List<TextBlock> getTextBlocksByPageNumbers(Set<Integer> pageNumbers) {
return streamAllSubNodes().filter(SemanticNode::isLeaf)
.map(SemanticNode::getTextBlock)
.map(TextBlock::getAtomicTextBlocks)
.flatMap(List::stream)
.filter(atb -> pageNumbers.contains(atb.getPage().getNumber()))
.collect(new ConsecutiveTextBlockCollector());
}
/**
* Any Node maintains its own Set of Entities.
* This Set contains all Entities whose TextRange intersects the TextRange of this node.

View File

@ -22,6 +22,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemp
import com.iqser.red.service.redaction.v1.server.client.FileStatusProcessingUpdateClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.service.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.service.DocumentSearchService;
import com.iqser.red.service.redaction.v1.server.service.UnprocessedChangesService;
import com.iqser.red.service.redaction.v1.server.utils.exception.DroolsTimeoutException;
@ -39,6 +40,7 @@ public class RedactionMessageReceiver {
private final FileStatusProcessingUpdateClient fileStatusProcessingUpdateClient;
private final RulesClient rulesClient;
private final UnprocessedChangesService unprocessedChangesService;
private final DocumentSearchService documentSearchService;
@SneakyThrows
@ -116,6 +118,14 @@ public class RedactionMessageReceiver {
log.info("Successful Imported Redactions Analysis Only dossier {} file {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId());
log.info("-------------------------------------------------------------------------------------------------");
break;
case SEARCH_BULK_LOCAL_TERM:
log.info("------------------------------Search Term occurrences for bulk local add ------------------------------------------");
log.info("Starting term search for {} for file {} in dossier {}", analyzeRequest.getBulkLocalRequest().getSearchTerm(), analyzeRequest.getFileId(), analyzeRequest.getDossierId());
documentSearchService.searchTermOccurrences(analyzeRequest);
log.info("Successfully located all term occurrences dossier {} file {} ", analyzeRequest.getDossierId(), analyzeRequest.getFileId());
log.info("-------------------------------------------------------------------------------------------------");
shouldRespond = false;
break;
default:
throw new IllegalArgumentException("Unknown MessageType: " + analyzeRequest.getMessageType());
}

View File

@ -0,0 +1,77 @@
package com.iqser.red.service.redaction.v1.server.service;
import java.util.List;
import java.util.Set;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.BulkLocalRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.BulkLocalResponse;
import com.iqser.red.service.persistence.service.v1.api.shared.model.EntityPosition;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position;
import com.iqser.red.service.redaction.v1.model.QueueNames;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.service.document.DocumentGraphMapper;
import com.iqser.red.service.redaction.v1.server.service.document.EntityFindingUtility;
import com.iqser.red.service.redaction.v1.server.storage.ObservedStorageService;
import com.knecon.fforesight.tenantcommons.TenantContext;
import io.micrometer.observation.annotation.Observed;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.experimental.FieldDefaults;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@FieldDefaults(level = AccessLevel.PRIVATE)
@RequiredArgsConstructor
public class DocumentSearchService {
final RabbitTemplate rabbitTemplate;
final ObservedStorageService observedStorageService;
final EntityFindingUtility entityFindingUtility;
final EntityLogCreatorService entityLogCreatorService;
@Observed(name = "DocumentSearchService", contextualName = "search-term-occurrences")
public void searchTermOccurrences(AnalyzeRequest analyzeRequest) {
BulkLocalRequest bulkLocalRequest = analyzeRequest.getBulkLocalRequest();
String searchTerm = bulkLocalRequest.getSearchTerm();
Set<Integer> pageNumbers = bulkLocalRequest.getPageNumbers();
Document document = DocumentGraphMapper.toDocumentGraph(observedStorageService.getDocumentData(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
List<TextEntity> possibleEntities;
if (pageNumbers == null || pageNumbers.isEmpty()) {
possibleEntities = entityFindingUtility.findAllPossibleEntitiesByValue(document, searchTerm, bulkLocalRequest.isCaseSensitive());
} else {
possibleEntities = entityFindingUtility.findAllPossibleEntitiesByValueOnPages(document, searchTerm, bulkLocalRequest.isCaseSensitive(), pageNumbers);
}
List<EntityPosition> entityPositions = possibleEntities.stream()
.flatMap(entity -> entityLogCreatorService.toEntityLogEntries(entity, 0, "", "")
.stream())
.map(entityLogEntry -> new EntityPosition(entityLogEntry.getPositions()))
.toList();
BulkLocalResponse bulkLocalResponse = BulkLocalResponse.builder()
.dossierId(analyzeRequest.getDossierId())
.fileId(analyzeRequest.getFileId())
.searchTerm(searchTerm)
.type(bulkLocalRequest.getType())
.reason(bulkLocalRequest.getReason())
.legalBasis(bulkLocalRequest.getLegalBasis())
.section(bulkLocalRequest.getSection())
.entityPositions(entityPositions)
.build();
rabbitTemplate.convertAndSend(QueueNames.SEARCH_TERM_OCCURRENCES_RESPONSE_EXCHANGE, TenantContext.getTenantId(), bulkLocalResponse);
}
}

View File

@ -173,7 +173,7 @@ public class EntityLogCreatorService {
}
private List<EntityLogEntry> toEntityLogEntries(TextEntity textEntity, int analysisNumber, String dossierId, String fileId) {
public List<EntityLogEntry> toEntityLogEntries(TextEntity textEntity, int analysisNumber, String dossierId, String fileId) {
List<EntityLogEntry> entityLogEntries = new ArrayList<>();

View File

@ -7,6 +7,8 @@ import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentData;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
@ -109,6 +111,7 @@ public class DocumentGraphMapper {
}
private Headline buildHeadline(Context context) {
return Headline.builder().documentTree(context.documentTree).build();

View File

@ -25,8 +25,10 @@ import com.iqser.red.service.redaction.v1.server.model.dictionary.SearchImplemen
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations;
import lombok.extern.slf4j.Slf4j;
@ -183,7 +185,10 @@ public class EntityFindingUtility {
.map(String::trim)
.collect(Collectors.toSet()), true);
return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange())
List<TextBlock> textBlocks = node.getTextBlocksByPageNumbers(pageNumbers);
return textBlocks.stream()
.flatMap(tb -> searchImplementation.getBoundaries(tb, tb.getTextRange()))
.map(boundary -> entityCreationService.byTextRangeWithEngine(boundary, "temp", EntityType.ENTITY, node, Collections.emptySet()))
.filter(Optional::isPresent)
.map(Optional::get)
@ -191,4 +196,45 @@ public class EntityFindingUtility {
.collect(groupingBy(entity -> entity.getValue().toLowerCase(Locale.ROOT)));
}
public List<TextEntity> findAllPossibleEntitiesByValueOnPages(Document document, String value, boolean caseSensitive, Set<Integer> pageNumbers) {
if (!pageNumbers.stream()
.allMatch(document::onPage)) {
throw new IllegalArgumentException(format("SemanticNode \"%s\" does not contain these pages %s, it has pages: %s",
document,
pageNumbers.stream()
.filter(pageNumber -> !document.onPage(pageNumber))
.toList(),
document.getPages()));
}
SearchImplementation searchImplementation = new SearchImplementation(value, !caseSensitive);
List<TextBlock> textBlocks = document.getTextBlocksByPageNumbers(pageNumbers);
return textBlocks.stream()
.flatMap(tb -> searchImplementation.getBoundaries(tb, tb.getTextRange())
.filter(textRange -> entityCreationService.isValidEntityTextRange(tb, textRange)))
.map(boundary -> entityCreationService.byTextRangeWithEngine(boundary, "temp", EntityType.ENTITY, document, Collections.emptySet()))
.filter(Optional::isPresent)
.map(Optional::get)
.distinct()
.toList();
}
public List<TextEntity> findAllPossibleEntitiesByValue(Document document, String value, boolean caseSensitive) {
SearchImplementation searchImplementation = new SearchImplementation(value, !caseSensitive);
return searchImplementation.getBoundaries(document.getTextBlock(), document.getTextRange())
.filter(textRange -> entityCreationService.isValidEntityTextRange(document.getTextBlock(), textRange))
.map(boundary -> entityCreationService.byTextRangeWithEngine(boundary, "temp", EntityType.ENTITY, document, Collections.emptySet()))
.filter(Optional::isPresent)
.map(Optional::get)
.distinct()
.toList();
}
}

View File

@ -55,6 +55,7 @@ import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
import com.iqser.red.service.redaction.v1.server.service.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.service.DocumentSearchService;
import com.iqser.red.service.redaction.v1.server.service.UnprocessedChangesService;
import com.iqser.red.service.redaction.v1.server.service.websocket.RedisSyncedWebSocketService;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
@ -156,6 +157,9 @@ public abstract class AbstractRedactionIntegrationTest {
@Autowired
protected UnprocessedChangesService unprocessedChangesService;
@Autowired
protected DocumentSearchService documentSearchService;
@Autowired
protected EntityLogDocumentRepository entityLogDocumentRepository;
@Autowired

View File

@ -9,6 +9,7 @@ import org.springframework.core.io.ClassPathResource;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
import com.iqser.red.commons.jackson.ObjectMapperFactory;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.MessageType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
@ -26,9 +27,6 @@ public class AnalyseFileRealDataIntegrationTest extends LiveDataIntegrationTest
@SneakyThrows
public void testFile() {
ObjectMapper om = new ObjectMapper();
om.registerModule(new JavaTimeModule());
var file = new ClassPathResource(BASE_DIR + "data/" + FILE_NAME + ".pdf").getInputStream();
redactionStorageService.storeObject(DOSSIER_ID, FILE_ID, FileType.ORIGIN, file);
@ -86,4 +84,34 @@ public class AnalyseFileRealDataIntegrationTest extends LiveDataIntegrationTest
}
@Test
@SneakyThrows
public void testSearchTermOccurrences() {
var file = new ClassPathResource(BASE_DIR + "data/" + FILE_NAME + ".pdf").getInputStream();
redactionStorageService.storeObject(DOSSIER_ID, FILE_ID, FileType.ORIGIN, file);
try {
var nerData = new ClassPathResource(BASE_DIR + "data/" + FILE_NAME + ".ner.json").getInputStream();
redactionStorageService.storeObject(DOSSIER_ID, FILE_ID, FileType.NER_ENTITIES, nerData);
} catch (Exception e) {
log.warn("No NER File Provided");
redactionServiceSettings.setNerServiceEnabled(false);
}
AnalyzeRequest ar = AnalyzeRequest.builder()
.fileId(FILE_ID)
.dossierId(DOSSIER_ID)
.analysisNumber(1)
.dossierTemplateId(DOSSIER_TEMPLATE_ID)
.lastProcessed(OffsetDateTime.now())
.fileAttributes(List.of())
.build();
ar.setMessageType(MessageType.SEARCH_BULK_LOCAL_TERM);
redactionMessageReceiver.receiveAnalyzeRequest(ar, false);
}
}

View File

@ -0,0 +1,162 @@
package com.iqser.red.service.redaction.v1.server.service.document;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import java.util.List;
import java.util.Set;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.ArgumentCaptor;
import org.mockito.Captor;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.mock.mockito.SpyBean;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.FilterType;
import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Primary;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.iqser.red.commons.jackson.ObjectMapperFactory;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.BulkLocalRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.BulkLocalResponse;
import com.iqser.red.service.persistence.service.v1.api.shared.model.EntityPosition;
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position;
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
import com.iqser.red.service.redaction.v1.model.QueueNames;
import com.iqser.red.service.redaction.v1.server.AbstractRedactionIntegrationTest;
import com.iqser.red.service.redaction.v1.server.Application;
import com.iqser.red.service.redaction.v1.server.RedactionIntegrationTest;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingServiceProcessorConfiguration;
import com.knecon.fforesight.tenantcommons.TenantContext;
import lombok.SneakyThrows;
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import(RedactionIntegrationTest.RedactionIntegrationTestConfiguration.class)
public class DocumentSearchServiceTest extends AbstractRedactionIntegrationTest {
private static final String RULES = loadFromClassPath("drools/acceptance_rules.drl");
@Configuration
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class})
@Import({LayoutParsingServiceProcessorConfiguration.class})
@ComponentScan(excludeFilters = {@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, value = StorageAutoConfiguration.class)})
public static class RedactionIntegrationTestConfiguration {
@Bean
@Primary
public StorageService inmemoryStorage() {
return new FileSystemBackedStorageService(ObjectMapperFactory.create());
}
}
@Captor
ArgumentCaptor<BulkLocalResponse> captor;
@SpyBean
RabbitTemplate rabbitTemplate;
@BeforeEach
public void stubClients() {
TenantContext.setTenantId("redaction");
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(System.currentTimeMillis());
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(JSONPrimitive.of(RULES));
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.COMPONENT)).thenReturn(-1L);
loadDictionaryForTest();
loadTypeForTest();
loadNerForTest();
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID, null, true)).thenReturn(getTemplateDictionaryTypeResponse());
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, null, true)).thenReturn(getDossierDictionaryTypeResponse());
mockDictionaryCalls(null);
when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors);
}
@Test
@SneakyThrows
public void testSearchTermOccurrences() {
String pdfFile = "files/syngenta/CustomerFiles/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf";
AnalyzeRequest request = uploadFileToStorage(pdfFile);
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
analyzeService.analyze(request);
BulkLocalRequest bulkLocalRequest = BulkLocalRequest.builder().searchTerm("AAOEL").type("type").reason("reason").legalBasis("legalBasis").build();
request.setBulkLocalRequest(bulkLocalRequest);
documentSearchService.searchTermOccurrences(request);
verify(rabbitTemplate).convertAndSend(eq(QueueNames.SEARCH_TERM_OCCURRENCES_RESPONSE_EXCHANGE), eq(TenantContext.getTenantId()), captor.capture());
BulkLocalResponse response = captor.getValue();
List<EntityPosition> entityPositions = response.getEntityPositions();
assertEquals(entityPositions.size(), 5);
assertEquals(response.getDossierId(), request.getDossierId());
assertEquals(response.getFileId(), request.getFileId());
assertEquals(response.getSearchTerm(), bulkLocalRequest.getSearchTerm());
assertEquals(response.getType(), bulkLocalRequest.getType());
assertEquals(response.getReason(), bulkLocalRequest.getReason());
assertEquals(response.getLegalBasis(), bulkLocalRequest.getLegalBasis());
assertEquals(response.getSection(), bulkLocalRequest.getSection());
}
@Test
@SneakyThrows
public void testSearchTermOccurrencesFilteredByPageNumbers() {
String pdfFile = "files/syngenta/CustomerFiles/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf";
AnalyzeRequest request = uploadFileToStorage(pdfFile);
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
analyzeService.analyze(request);
BulkLocalRequest bulkLocalRequest = BulkLocalRequest.builder().searchTerm("SANCO/1426/2001").type("type").reason("reason").legalBasis("legalBasis").pageNumbers(Set.of(9, 127, 147)).build();
request.setBulkLocalRequest(bulkLocalRequest);
documentSearchService.searchTermOccurrences(request);
verify(rabbitTemplate).convertAndSend(eq(QueueNames.SEARCH_TERM_OCCURRENCES_RESPONSE_EXCHANGE), eq(TenantContext.getTenantId()), captor.capture());
BulkLocalResponse response = captor.getValue();
List<EntityPosition> entityPositions = response.getEntityPositions();
assertEquals(entityPositions.size(), 3);
assertEquals(response.getDossierId(), request.getDossierId());
assertEquals(response.getFileId(), request.getFileId());
assertEquals(response.getSearchTerm(), bulkLocalRequest.getSearchTerm());
assertEquals(response.getType(), bulkLocalRequest.getType());
assertEquals(response.getReason(), bulkLocalRequest.getReason());
assertEquals(response.getLegalBasis(), bulkLocalRequest.getLegalBasis());
assertEquals(response.getSection(), bulkLocalRequest.getSection());
}
}