RED-6169: Use custom analyser for filename and changed to perfixQuery for filename

This commit is contained in:
deiflaender 2023-02-16 09:44:01 +01:00
parent 55b08215d4
commit 6ee6c00746
5 changed files with 62 additions and 3 deletions

View File

@ -43,8 +43,10 @@ public class IndexCreatorService {
public void createIndex() {
String indexMapping = ResourceLoader.load("index/mapping.json");
String indexSettings = ResourceLoader.load("index/settings.json");
Settings.Builder settingsBuilder = Settings.builder()
.loadFromSource(indexSettings, XContentType.JSON)
.put("number_of_shards", settings.getNumberOfShards())
.put("number_of_replicas", settings.getNumberOfReplicas())
.put("index.mapping.nested_objects.limit", settings.getNumberOfNestedObjectLimit());

View File

@ -122,7 +122,7 @@ public class SearchService {
for (String must : query.getMusts()) {
QueryBuilder textPhraseQuery = QueryBuilders.matchPhraseQuery("sections.text", must.toLowerCase(Locale.ROOT)).queryName(must);
QueryBuilder filenamePhraseQuery = QueryBuilders.matchPhraseQuery("filename", must.toLowerCase(Locale.ROOT)).queryName("filename." + must);
QueryBuilder filenamePhraseQuery = QueryBuilders.matchPhrasePrefixQuery("filename", must.toLowerCase(Locale.ROOT)).queryName("filename." + must);
QueryBuilder fileAttributesPhraseQuery = QueryBuilders.matchPhraseQuery("fileAttributes.value", must.toLowerCase(Locale.ROOT)).queryName("fileAttributes." + must);
QueryBuilder filenameOrTextMustQuery = QueryBuilders.boolQuery().should(textPhraseQuery).should(filenamePhraseQuery).should(fileAttributesPhraseQuery);
@ -132,7 +132,7 @@ public class SearchService {
for (String should : query.getShoulds()) {
QueryBuilder textTermQuery = QueryBuilders.matchPhraseQuery("sections.text", should.toLowerCase(Locale.ROOT)).queryName(should);
QueryBuilder filenameTermQuery = QueryBuilders.matchPhraseQuery("filename", should.toLowerCase(Locale.ROOT)).queryName("filename." + should);
QueryBuilder filenameTermQuery = QueryBuilders.matchPhrasePrefixQuery("filename", should.toLowerCase(Locale.ROOT)).queryName("filename." + should);
QueryBuilder fileAttributesPhraseQuery = QueryBuilders.matchPhraseQuery("fileAttributes.value", should.toLowerCase(Locale.ROOT)).queryName("fileAttributes." + should);
entireQuery.should(textTermQuery);
entireQuery.should(filenameTermQuery);

View File

@ -12,7 +12,7 @@
"filename": {
"type": "text",
"term_vector": "with_positions_offsets",
"analyzer": "simple"
"analyzer": "filename_analyzer"
},
"date": {
"type": "date"

View File

@ -0,0 +1,24 @@
{
"analysis": {
"tokenizer": {
"filename_tokenizer": {
"type": "char_group",
"tokenize_on_chars": [
"whitespace",
"punctuation",
"symbol"
]
}
},
"analyzer": {
"filename_analyzer": {
"type": "custom",
"tokenizer": "filename_tokenizer",
"filter": [
"lowercase",
"trim"
]
}
}
}
}

View File

@ -730,6 +730,39 @@ public class SearchTest extends AbstractElasticsearchIntegrationTest {
}
@Test
public void testFilenameWithNumbersExactMatch() throws IOException {
ClassPathResource textResource = new ClassPathResource("files/Text.json");
Text text = objectMapper.readValue(textResource.getInputStream(), Text.class);
String fileName = "116 IDD0000261308.pdf";
String filename2 = "115 IDD0000261308.pdf";
String searchString = "\"116 IDD\"";
documentIndexService.indexDocument("template1", "dossierId1", "fileId1", fileName, text, "UserId", false, false, WorkflowStatus.NEW, Map.of("F1Key", "F1Value"));
documentIndexService.indexDocument("template1", "dossierId1", "fileId2", filename2, text, "UserId", false, false, WorkflowStatus.NEW, Map.of("F1Key", "F1Value"));
SearchResult result = searchService.search(searchString, null, List.of("dossierId1"), null, null, false, false, null, null, 0, 10, true);
assertThat(result.getMatchedDocuments().size()).isEqualTo(1);
assertThat(result.getMatchedDocuments().get(0).getMatchedTerms().size()).isGreaterThan(0);
}
@Test
public void testFilenameWithNumbersMatch() throws IOException {
ClassPathResource textResource = new ClassPathResource("files/Text.json");
Text text = objectMapper.readValue(textResource.getInputStream(), Text.class);
String fileName = "VV-733382.pdf";
String searchString = "733382";
documentIndexService.indexDocument("template1", "dossierId1", "fileId1", fileName, text, "UserId", false, false, WorkflowStatus.NEW, Map.of("F1Key", "F1Value"));
SearchResult result = searchService.search(searchString, null, List.of("dossierId1"), null, null, false, false, null, null, 0, 10, true);
assertThat(result.getMatchedDocuments().size()).isEqualTo(1);
assertThat(result.getMatchedDocuments().get(0).getMatchedTerms().size()).isGreaterThan(0);
}
/*
* Filename contains only one uppercase word and searchString contains complete filename without ending
*/