问题描述
我使用n-gram标记器在弹性搜索中制作了一个自动增益计算器。现在我想在自动建议列表中突出显示用户输入的字符序列。为了这个目的,我使用了弹性搜索中的荧光笔,我的代码如下,但在输出中,完整的术语正在突出显示我在哪里出错。
{
pre>
query:{
query_string:{
query:soft,
default_field:competency_display_name
},
highlight:{
pre_tags:[< b>],
post_tags:[< / b> ],
fields:{
competency_display_name:{}
}
}
}
,结果是
{
采取:8,
timed_out:false,
_shards:{
总计:5,
成功:5,
:0
},
hits:{
total:1,
max_score:1,
hits:[
{
_index:competency_auto_suggest,
_type: 能力,
_id:4,
_score:1,
_source:{
review:null,
$
bbbbbbbbbbbbbb competency_display_name:[
< b>软件开发< / b>
]
}
}
]
}
}
映射
competency:{
properties:{
competency_display_name:{
type:string,
index_analyzer:index_ngram_analyzer,
search_analyzer:search_term_analyzer
}
}
}
设置
分析:{
过滤器:{
ngram_tokenizer:{
type:nGram,
min_gram:1,
max_gram:15,
token_chars:[letter,digit]
}
},
analyzer:{
index_ngram_analyzer:{
type:custom,
tokenizer:keyword,
filter ngram_tokenizer,lowercase]
},
search_term_analyzer:{
type:custom,
tokenizer:keyword,
filter:smallcase
}
}
}
如何突出显示Soft而不是软件开发。
解决方案在这种情况下,您应该使用ngram tokenizer而不是ngram filter来突出显示。
with_positions_offsets
需要帮助突出显示更快。
这是可行的设置&映射:
分析:{
tokenizer:{
ngram_tokenizer:{
type:nGram,
min_gram:1,
max_gram:15,
token_chars:[letter
}
},
analyzer:{
index_ngram_analyzer:{
type:custom,
tokenizer :ngram_tokenizer,
filter:[smallcase]
},
search_term_analyzer:{
type:custom,
tokenizer:关键字,
过滤器:小写
}
}
}
映射
competency:{
properties {
competency_display_name:{
type:string,
index_analyzer:index_ngram_analyzer,
search_analyzer:search_term_analyzer,
term_vector:with_positions_offsets
}
}
}
I have made a auto-suggester in elastic search using n-gram tokenizer. Now I want to highlight the user entered character sequence in the auto suggest list. For this purpose I used the highlighter available in elastic search my code is as below but in the output the complete term is being highlighted where am I going wrong.
{ "query": { "query_string": { "query": "soft", "default_field": "competency_display_name" } }, "highlight": { "pre_tags": ["<b>"], "post_tags": ["</b>"], "fields": { "competency_display_name": {} } } }
and the result is
{ "took": 8, "timed_out": false, "_shards": { "total": 5, "successful": 5, "failed": 0 }, "hits": { "total": 1, "max_score": 1, "hits": [ { "_index": "competency_auto_suggest", "_type": "competency", "_id": "4", "_score": 1, "_source": { "review": null, "competency_title": "Software Development", "id": 4, "competency_display_name": "Software Development" }, "highlight": { "competency_display_name": [ "<b>Software Development</b>" ] } } ] } }
mapping
"competency":{ "properties": { "competency_display_name":{ "type":"string", "index_analyzer": "index_ngram_analyzer", "search_analyzer": "search_term_analyzer" } } }
settings
"analysis": { "filter": { "ngram_tokenizer": { "type": "nGram", "min_gram": "1", "max_gram": "15", "token_chars": [ "letter", "digit" ] } }, "analyzer": { "index_ngram_analyzer": { "type": "custom", "tokenizer": "keyword", "filter": [ "ngram_tokenizer", "lowercase" ] }, "search_term_analyzer": { "type": "custom", "tokenizer": "keyword", "filter": "lowercase" } } }
how to highlight Soft instead of Software Development.
解决方案You should use ngram tokenizer instead of ngram filter to highlight in this case.
with_positions_offsets
is needed to help highlighting more faster.Here's the workable settings & mapping :
"analysis": { "tokenizer": { "ngram_tokenizer": { "type": "nGram", "min_gram": "1", "max_gram": "15", "token_chars": [ "letter", "digit" ] } }, "analyzer": { "index_ngram_analyzer": { "type": "custom", "tokenizer": "ngram_tokenizer", "filter": [ "lowercase" ] }, "search_term_analyzer": { "type": "custom", "tokenizer": "keyword", "filter": "lowercase" } } }
mapping
"competency":{ "properties": { "competency_display_name":{ "type":"string", "index_analyzer": "index_ngram_analyzer", "search_analyzer": "search_term_analyzer", "term_vector":"with_positions_offsets" } } }
这篇关于在弹性搜索中强调词的一部分的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持!