我在#caseIndia之类的驼峰案例中有一个带有标签的标签。现在,单击此主题标签后,它将获取其中包含“#teamIndia”的所有结果,首先应显示“#teamIndia”的结果,然后显示“teamIndia”的结果,然后显示“team India”的结果,然后显示“team”或“印度”等。
我在做什么:
搜索文字:
“#teamIndia”,“#NEWYORK”,“#profession”,“#2016”
POST /clip
{
"settings": {
"analysis": {
"char_filter" : {
"space_hashtags" : {
"type" : "mapping",
"mappings" : ["#=>|#"]
}
},
"filter": {
"substring": {
"max_gram": "20",
"type": "nGram",
"min_gram": "1",
"token_chars": [
"whitespace"
]
},
"camelcase": {
"type": "word_delimiter",
"type_table": ["# => ALPHANUM", "@ => ALPHANUM"]
},
"stopword": {
"type": "stop",
"stopwords": ["and", "is", "the"]
}
},
"analyzer": {
"substring_analyzer": {
"filter": [
"lowercase",
"substring"
],
"tokenizer": "standard"
},
"camelcase_analyzer": {
"type" : "custom",
"char_filter" : "space_hashtags",
"tokenizer" : "whitespace",
"filter": [
"camelcase",
"lowercase",
"stopword"
]
}
}
}
},
"mappings": {
"Clip": {
"properties": {
"description": {
"type": "multi_field",
"fields": {
"description": {
"type": "string",
"analyzer": "substring_analyzer",
"search_analyzer": "standard"
},
"raw": {
"type": "string",
"index": "not_analyzed"
},
"hashtag": {
"type": "string",
"index": "analyzed",
"analyzer": "camelcase_analyzer"
}
}
},
....
}
}
}
}
文档示例:-
POST /clip/Clip/2 {"id" : 1, "description" : "TheBestAndTheBeast"}
POST /clip/Clip/3 {"id" : 2, "description" : "bikes in DUBAI TheBestAndTheBeast profession"}
POST /clip/Clip/3 {"id" : 2, "description" : "Know how a software engineer surprised his wife! <a href="search/clips?q=%23theProvider&source=hashtag" ng-click="handleModalClick()"> #theProvider </a> rioOlympic <a href="search/clips?q=%23DUBAI&source=hashtag" ng-click="handleModalClick()"> #DUBAI </a> <a href="search/clips?q=%23TheBestAndTheBeast&source=hashtag" ng-click="handleModalClick()"> #TheBestAndTheBeast </a> <a href="search/clips?q=%23rioOlympic&source=hashtag" ng-click="handleModalClick()"> #rioOlympic </a>"}
**搜索查询**
GET clip/_search
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must":
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "teamIndia"
}
},
"should": {
"match":
{ "description.raw": "#teamIndia"}
}
}
}
}
}
}
异常(exception)结果:
“#teamIndia”,
“teamIndia”,
“印度队”,
“团队”,
“印度”,
与其他测试关键字相似。
最佳答案
原始帖子中的查询无法按预期运行的原因之一是因为description.raw
是not_analyzed
。
结果,#teamIndia
将永远不会与description: "Animals and Pets and #teamIndia"
匹配文档,因为description.raw
将包含
未分析的词Animals and Pets and #teamIndia
而不是#teamIndia
假设您拥有的文档类似于OP中的第二个示例。
示例:
{"id" : 2, "description" : "Animals and Pets and #teamIndia"}
或者
{"id":7,"description":"This <a href="search/clips?q=%23teamIndia&source=hashtag">#teamIndia</a>"}
然后,您应该能够按照以下顺序对文档进行排名:
1)包含“#teamIndia”的描述,
2)包含“teamIndia”的描述
3)包含“团队印度”的描述
4)包含“印度”的说明
通过在wordlimiter过滤器中启用
preserve_orginal
和catenate_words
,如下例所示示例:
索引文档
PUT clip
{
"settings": {
"analysis": {
"char_filter": {
"zwsp_normalize": {
"type": "mapping",
"mappings": [
"\\u200B=>",
"\\u200C=>",
"\\u200D=>"
]
},
"html_decoder": {
"type": "mapping",
"mappings": [
"<=> <",
">=> >"
]
}
},
"filter": {
"camelcase": {
"type": "word_delimiter",
"preserve_original": "true",
"catenate_all": "true"
},
"stopword": {
"type": "stop",
"stopwords": [
"and",
"is",
"the"
]
}
},
"analyzer": {
"camelcase_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"camelcase",
"lowercase",
"stopword"
],
"char_filter": [
"zwsp_normalize",
"html_decoder",
"html_strip"
]
}
}
}
},
"mappings": {
"Clip": {
"properties": {
"description": {
"type": "multi_field",
"fields": {
"hashtag": {
"type": "string",
"index": "analyzed",
"analyzer": "camelcase_analyzer",
"norms": {
"enabled": false
}
}
}
}
}
}
}
}
POST /clip/Clip/1
{
"id": 1,
"description": "Animals and Pets and #teamIndia"
}
POST /clip/Clip/2
{
"id": 2,
"description": "Animals and Pets and teamIndia"
}
POST /clip/Clip/3
{
"id": 3,
"description": "Animals and Pets and team India"
}
POST /clip/Clip/4
{
"id": 4,
"description": "Animals and Pets and India"
}
POST /clip/Clip/7
{
"id": 7,
"description": "This <a href="search/clips?q=%23teamIndia&source=hashtag">#teamIndia</a>"
}
查询结果:
POST clip/_search?search_type=dfs_query_then_fetch
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "#teamIndia"
}
}
]
}
}
}
}
}
结果:
"hits": {
"total": 5,
"max_score": 1.4969246,
"hits": [
{
"_index": "clip",
"_type": "Clip",
"_id": "7",
"_score": 1.4969246,
"_source": {
"id": 7,
"description": "This <a href="search/clips?q=%23teamIndia&source=hashtag">#teamIndia</a>"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "1",
"_score": 1.4969246,
"_source": {
"id": 1,
"description": "Animals and Pets and #teamIndia"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "2",
"_score": 1.0952718,
"_source": {
"id": 2,
"description": "Animals and Pets and teamIndia"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "3",
"_score": 0.5207714,
"_source": {
"id": 3,
"description": "Animals and Pets and team India"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "4",
"_score": 0.11123338,
"_source": {
"id": 4,
"description": "Animals and Pets and India"
}
}
]
}
#dubai示例:
POST /clip/Clip/5
{
"id": 5,
"description": "#dubai is hot"
}
POST /clip/Clip/6
{
"id": 6,
"description": "dubai airport is huge"
}
POST clip/_search?search_type=dfs_query_then_fetch
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "#dubai"
}
}
]
}
}
}
}
}
"hits": {
"total": 2,
"max_score": 1.820827,
"hits": [
{
"_index": "clip",
"_type": "Clip",
"_id": "5",
"_score": 1.820827,
"_source": {
"id": 5,
"description": "#dubai is hot"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "6",
"_score": 0.5856731,
"_source": {
"id": 6,
"description": "dubai airport is huge"
}
}
]
}
示例#professionalAndPunctual:
POST /clip/Clip/7
{
"id": 7,
"description": "professionalAndPunctual"
}
POST clip/_search?search_type=dfs_query_then_fetch
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "#professionalAndPunctual"
}
}
]
}
}
}
}
}
"hits": [
{
"_index": "clip",
"_type": "Clip",
"_id": "7",
"_score": 2.2149992,
"_source": {
"id": 7,
"description": "professionalAndPunctual"
}
}
]
编辑示例
示例:#TheBestAndTheBea
POST /clip/Clip/10
{"id" : 10, "description" : "TheBestAndTheBeast"}
POST /clip/Clip/11
{"id" :11, "description" : "bikes in DUBAI TheBestAndTheBeast profession"}
POST /clip/Clip/12
{"id" : 12, "description" : "Know how a software engineer surprised his wife! <a href=\"search/clips?q=%23theProvider&source=hashtag\" ng-click=\"handleModalClick()\"> #theProvider </a> rioOlympic <a href=\"search/clips?q=%23DUBAI&source=hashtag\" ng-click=\"handleModalClick()\"> #DUBAI </a> <a href=\"search/clips?q=%23TheBestAndTheBeast&source=hashtag\" ng-click=\"handleModalClick()\"> #TheBestAndTheBeast </a> <a href=\"search/clips?q=%23rioOlympic&source=hashtag\" ng-click=\"handleModalClick()\"> #rioOlympic </a>"}
POST clip/_search?search_type=dfs_query_then_fetch
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "#TheBestAndTheBeast"
}
}
]
}
}
}
}
}
#结果
"hits": [
{
"_index": "clip",
"_type": "Clip",
"_id": "12",
"_score": 1.8701664,
"_source": {
"id": 12,
"description": "Know how a software engineer surprised his wife! <a href=\"search/clips?q=%23theProvider&source=hashtag\" ng-click=\"handleModalClick()\"> #theProvider </a> rioOlympic <a href=\"search/clips?q=%23DUBAI&source=hashtag\" ng-click=\"handleModalClick()\"> #DUBAI </a> <a href=\"search/clips?q=%23TheBestAndTheBeast&source=hashtag\" ng-click=\"handleModalClick()\"> #TheBestAndTheBeast </a> <a href=\"search/clips?q=%23rioOlympic&source=hashtag\" ng-click=\"handleModalClick()\"> #rioOlympic </a>"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "10",
"_score": 0.9263139,
"_source": {
"id": 10,
"description": "TheBestAndTheBeast"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "11",
"_score": 0.9263139,
"_source": {
"id": 11,
"description": "bikes in DUBAI TheBestAndTheBeast profession"
}
}
]
分析器示例:
get clip/_analyze?analyzer=camelcase_analyzer&text=%23DUBAI
{
"tokens": [
{
"token": "#dubai",
"start_offset": 0,
"end_offset": 6,
"type": "word",
"position": 0
},
{
"token": "dubai",
"start_offset": 1,
"end_offset": 6,
"type": "word",
"position": 0
}
]
}
get clip/_analyze?analyzer=camelcase_analyzer&text=This%20%26lt%3Ba%20href%3D%26quot%3Bsearch%2Fclips%3Fq%3D%2523teamIndia%26amp%3Bsource%3Dhashtag%26quot%3B%26gt%3B%23teamIndia%26lt%3B%2Fa%26gt%3B
{
"tokens": [
{
"token": "this",
"start_offset": 0,
"end_offset": 4,
"type": "word",
"position": 0
},
{
"token": "#teamindia",
"start_offset": 78,
"end_offset": 98,
"type": "word",
"position": 1
},
{
"token": "india",
"start_offset": 78,
"end_offset": 98,
"type": "word",
"position": 2
},
{
"token": "team",
"start_offset": 78,
"end_offset": 98,
"type": "word",
"position": 2
},
{
"token": "teamindia",
"start_offset": 78,
"end_offset": 98,
"type": "word",
"position": 2
}
]
}
关于Elasticsearch : search results on clicking on Hashtag,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/39345299/