From 6ee6c00746390397e1cd4efb4e828bfc37efc45d Mon Sep 17 00:00:00 2001 From: deiflaender Date: Thu, 16 Feb 2023 09:44:01 +0100 Subject: [PATCH] RED-6169: Use custom analyser for filename and changed to perfixQuery for filename --- .../server/service/IndexCreatorService.java | 2 ++ .../v1/server/service/SearchService.java | 4 +-- .../src/main/resources/index/mapping.json | 2 +- .../src/main/resources/index/settings.json | 24 ++++++++++++++ .../search/v1/server/service/SearchTest.java | 33 +++++++++++++++++++ 5 files changed, 62 insertions(+), 3 deletions(-) create mode 100644 search-service-v1/search-service-server-v1/src/main/resources/index/settings.json diff --git a/search-service-v1/search-service-server-v1/src/main/java/com/iqser/red/service/search/v1/server/service/IndexCreatorService.java b/search-service-v1/search-service-server-v1/src/main/java/com/iqser/red/service/search/v1/server/service/IndexCreatorService.java index 087ee01..8fa2122 100644 --- a/search-service-v1/search-service-server-v1/src/main/java/com/iqser/red/service/search/v1/server/service/IndexCreatorService.java +++ b/search-service-v1/search-service-server-v1/src/main/java/com/iqser/red/service/search/v1/server/service/IndexCreatorService.java @@ -43,8 +43,10 @@ public class IndexCreatorService { public void createIndex() { String indexMapping = ResourceLoader.load("index/mapping.json"); + String indexSettings = ResourceLoader.load("index/settings.json"); Settings.Builder settingsBuilder = Settings.builder() + .loadFromSource(indexSettings, XContentType.JSON) .put("number_of_shards", settings.getNumberOfShards()) .put("number_of_replicas", settings.getNumberOfReplicas()) .put("index.mapping.nested_objects.limit", settings.getNumberOfNestedObjectLimit()); diff --git a/search-service-v1/search-service-server-v1/src/main/java/com/iqser/red/service/search/v1/server/service/SearchService.java b/search-service-v1/search-service-server-v1/src/main/java/com/iqser/red/service/search/v1/server/service/SearchService.java index de757cb..4638cde 100644 --- a/search-service-v1/search-service-server-v1/src/main/java/com/iqser/red/service/search/v1/server/service/SearchService.java +++ b/search-service-v1/search-service-server-v1/src/main/java/com/iqser/red/service/search/v1/server/service/SearchService.java @@ -122,7 +122,7 @@ public class SearchService { for (String must : query.getMusts()) { QueryBuilder textPhraseQuery = QueryBuilders.matchPhraseQuery("sections.text", must.toLowerCase(Locale.ROOT)).queryName(must); - QueryBuilder filenamePhraseQuery = QueryBuilders.matchPhraseQuery("filename", must.toLowerCase(Locale.ROOT)).queryName("filename." + must); + QueryBuilder filenamePhraseQuery = QueryBuilders.matchPhrasePrefixQuery("filename", must.toLowerCase(Locale.ROOT)).queryName("filename." + must); QueryBuilder fileAttributesPhraseQuery = QueryBuilders.matchPhraseQuery("fileAttributes.value", must.toLowerCase(Locale.ROOT)).queryName("fileAttributes." + must); QueryBuilder filenameOrTextMustQuery = QueryBuilders.boolQuery().should(textPhraseQuery).should(filenamePhraseQuery).should(fileAttributesPhraseQuery); @@ -132,7 +132,7 @@ public class SearchService { for (String should : query.getShoulds()) { QueryBuilder textTermQuery = QueryBuilders.matchPhraseQuery("sections.text", should.toLowerCase(Locale.ROOT)).queryName(should); - QueryBuilder filenameTermQuery = QueryBuilders.matchPhraseQuery("filename", should.toLowerCase(Locale.ROOT)).queryName("filename." + should); + QueryBuilder filenameTermQuery = QueryBuilders.matchPhrasePrefixQuery("filename", should.toLowerCase(Locale.ROOT)).queryName("filename." + should); QueryBuilder fileAttributesPhraseQuery = QueryBuilders.matchPhraseQuery("fileAttributes.value", should.toLowerCase(Locale.ROOT)).queryName("fileAttributes." + should); entireQuery.should(textTermQuery); entireQuery.should(filenameTermQuery); diff --git a/search-service-v1/search-service-server-v1/src/main/resources/index/mapping.json b/search-service-v1/search-service-server-v1/src/main/resources/index/mapping.json index ea8bc51..4d00505 100644 --- a/search-service-v1/search-service-server-v1/src/main/resources/index/mapping.json +++ b/search-service-v1/search-service-server-v1/src/main/resources/index/mapping.json @@ -12,7 +12,7 @@ "filename": { "type": "text", "term_vector": "with_positions_offsets", - "analyzer": "simple" + "analyzer": "filename_analyzer" }, "date": { "type": "date" diff --git a/search-service-v1/search-service-server-v1/src/main/resources/index/settings.json b/search-service-v1/search-service-server-v1/src/main/resources/index/settings.json new file mode 100644 index 0000000..940859f --- /dev/null +++ b/search-service-v1/search-service-server-v1/src/main/resources/index/settings.json @@ -0,0 +1,24 @@ +{ + "analysis": { + "tokenizer": { + "filename_tokenizer": { + "type": "char_group", + "tokenize_on_chars": [ + "whitespace", + "punctuation", + "symbol" + ] + } + }, + "analyzer": { + "filename_analyzer": { + "type": "custom", + "tokenizer": "filename_tokenizer", + "filter": [ + "lowercase", + "trim" + ] + } + } + } +} \ No newline at end of file diff --git a/search-service-v1/search-service-server-v1/src/test/java/com/iqser/red/service/search/v1/server/service/SearchTest.java b/search-service-v1/search-service-server-v1/src/test/java/com/iqser/red/service/search/v1/server/service/SearchTest.java index 0a8b1cb..9e3462e 100644 --- a/search-service-v1/search-service-server-v1/src/test/java/com/iqser/red/service/search/v1/server/service/SearchTest.java +++ b/search-service-v1/search-service-server-v1/src/test/java/com/iqser/red/service/search/v1/server/service/SearchTest.java @@ -730,6 +730,39 @@ public class SearchTest extends AbstractElasticsearchIntegrationTest { } + @Test + public void testFilenameWithNumbersExactMatch() throws IOException { + + ClassPathResource textResource = new ClassPathResource("files/Text.json"); + Text text = objectMapper.readValue(textResource.getInputStream(), Text.class); + String fileName = "116 IDD0000261308.pdf"; + String filename2 = "115 IDD0000261308.pdf"; + String searchString = "\"116 IDD\""; + documentIndexService.indexDocument("template1", "dossierId1", "fileId1", fileName, text, "UserId", false, false, WorkflowStatus.NEW, Map.of("F1Key", "F1Value")); + documentIndexService.indexDocument("template1", "dossierId1", "fileId2", filename2, text, "UserId", false, false, WorkflowStatus.NEW, Map.of("F1Key", "F1Value")); + + SearchResult result = searchService.search(searchString, null, List.of("dossierId1"), null, null, false, false, null, null, 0, 10, true); + + assertThat(result.getMatchedDocuments().size()).isEqualTo(1); + assertThat(result.getMatchedDocuments().get(0).getMatchedTerms().size()).isGreaterThan(0); + } + + @Test + public void testFilenameWithNumbersMatch() throws IOException { + + ClassPathResource textResource = new ClassPathResource("files/Text.json"); + Text text = objectMapper.readValue(textResource.getInputStream(), Text.class); + String fileName = "VV-733382.pdf"; + String searchString = "733382"; + documentIndexService.indexDocument("template1", "dossierId1", "fileId1", fileName, text, "UserId", false, false, WorkflowStatus.NEW, Map.of("F1Key", "F1Value")); + + SearchResult result = searchService.search(searchString, null, List.of("dossierId1"), null, null, false, false, null, null, 0, 10, true); + + assertThat(result.getMatchedDocuments().size()).isEqualTo(1); + assertThat(result.getMatchedDocuments().get(0).getMatchedTerms().size()).isGreaterThan(0); + } + + /* * Filename contains only one uppercase word and searchString contains complete filename without ending */