如何按匹配排序优先选择最左边的单词

说明

按匹配词对前缀查询进行排序,但将匹配项的优先级排在左侧。

我做过的测试

数据

DELETE /test
PUT /test

PUT /test/person/_mapping
{
  "properties": {
    "name": {
      "type": "multi_field",
      "fields": {
        "name": {"type": "string"},
        "original": {
          "type": "string",
          "index": "not_analyzed"
        }
      }
    }
  }
}

PUT /test/person/1
{"name": "Berta Kassulke"}

PUT /test/person/2
{"name": "Kaley Bartoletti"}

PUT /test/person/3
{"name": "Kali Hahn"}

PUT /test/person/4
{"name": "Karolann Klein"}

PUT /test/person/5
{"name": "Sofia Mandez Kaloo"}



简单查询

询问
POST /test/person/_search
{
  "query": {
    "prefix": {"name": {"value": "ka"}}
  }
}

结果
{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 4,
    "max_score": 1,
    "hits": [
      {
        "_index": "test",
        "_type": "person",
        "_id": "4",
        "_score": 1,
        "_source": {
          "name": "Karolann Klein"
        }
      },
      {
        "_index": "test",
        "_type": "person",
        "_id": "5",
        "_score": 1,
        "_source": {
          "name": "Sofia Mandez Kaloo"
        }
      },
      {
        "_index": "test",
        "_type": "person",
        "_id": "1",
        "_score": 1,
        "_source": {
          "name": "Berta Kassulke"
        }
      },
      {
        "_index": "test",
        "_type": "person",
        "_id": "2",
        "_score": 1,
        "_source": {
          "name": "Kaley Bartoletti"
        }
      },
      {
        "_index": "test",
        "_type": "person",
        "_id": "3",
        "_score": 1,
        "_source": {
          "name": "Kali Hahn"
        }
      }
    ]
  }
}

与排序

请求
POST /test/person/_search
{
  "query": {
    "prefix": {"name": {"value": "ka"}}
  },
  "sort": {"name": {"order": "asc"}}
}

结果
{
  "took": 7,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 4,
    "max_score": null,
    "hits": [
      {
        "_index": "test",
        "_type": "person",
        "_id": "2",
        "_score": null,
        "_source": {
          "name": "Kaley Bartoletti"
        },
        "sort": [
          "bartoletti"
        ]
      },
      {
        "_index": "test",
        "_type": "person",
        "_id": "1",
        "_score": null,
        "_source": {
          "name": "Berta Kassulke"
        },
        "sort": [
          "berta"
        ]
      },
      {
        "_index": "test",
        "_type": "person",
        "_id": "3",
        "_score": null,
        "_source": {
          "name": "Kali Hahn"
        },
        "sort": [
          "hahn"
        ]
      },
      {
        "_index": "test",
        "_type": "person",
        "_id": "5",
        "_score": null,
        "_source": {
           "name": "Sofia Mandez Kaloo"
        },
        "sort": [
          "kaloo"
        ]
      },
      {
        "_index": "test",
        "_type": "person",
        "_id": "4",
        "_score": null,
        "_source": {
          "name": "Karolann Klein"
        },
        "sort": [
          "karolann"
        ]
      }
    ]
  }
}

与原始值排序

询问
POST /test/person/_search
{
  "query": {
    "prefix": {"name": {"value": "ka"}}
  },
  "sort": {"name.original": {"order": "asc"}}
}

结果
{
  "took": 6,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 4,
    "max_score": null,
    "hits": [
      {
        "_index": "test",
        "_type": "person",
        "_id": "1",
        "_score": null,
        "_source": {
          "name": "Berta Kassulke"
        },
        "sort": [
          "Berta Kassulke"
        ]
      },
      {
        "_index": "test",
        "_type": "person",
        "_id": "2",
        "_score": null,
        "_source": {
          "name": "Kaley Bartoletti"
        },
        "sort": [
          "Kaley Bartoletti"
        ]
      },
      {
        "_index": "test",
        "_type": "person",
        "_id": "3",
        "_score": null,
        "_source": {
          "name": "Kali Hahn"
        },
        "sort": [
          "Kali Hahn"
        ]
      },
      {
        "_index": "test",
        "_type": "person",
        "_id": "4",
        "_score": null,
        "_source": {
          "name": "Karolann Klein"
        },
        "sort": [
          "Karolann Klein"
        ]
      },
      {
        "_index": "test",
        "_type": "person",
        "_id": "5",
        "_score": null,
        "_source": {
           "name": "Sofia Mandez Kaloo"
        },
        "sort": [
          "Sofia Mandez Kaloo"
        ]
      }
    ]
  }
}

预期结果

按名称ASC排序,但对最左边的单词进行优先排序
  • 凯莉·巴托列蒂
  • Kali Hahn
  • Karolann Klein
  • Berta Kassulke
  • 索非亚·曼德斯·卡鲁
  • 最佳答案

    好问题。实现此目的的一种方法是将edge ngram filterspan first query组合使用

    这是我的设定

    {
        "settings": {
            "analysis": {
                "analyzer": {
                    "my_custom_analyzer": {
                        "tokenizer": "standard",
                        "filter": ["lowercase",
                            "edge_filter",
                            "asciifolding"
                        ]
                    }
                },
                "filter": {
                    "edge_filter": {
                        "type": "edgeNGram",
                        "min_gram": 2,
                        "max_gram": 8
                    }
    
                }
    
            }
        },
        "mappings": {
            "person": {
                "properties": {
                    "name": {
                        "type": "string",
                        "analyzer": "my_custom_analyzer",
                        "search_analyzer": "standard",
                        "fields": {
                            "standard": {
                                "type": "string"
                            }
                        }
                    }
                }
            }
    
        }
    }
    

    之后,我插入了您的样本文档。然后,我使用dis_max编写了以下查询。请注意,第一个end span query 参数为1,因此这将优先(较高分数)最左边的匹配项。我首先按score排序,然后按name排序。
    {
      "query": {
        "dis_max": {
          "tie_breaker": 0.7,
          "boost": 1.2,
          "queries": [
            {
              "match": {
                "name": "ka"
              }
            },
            {
              "span_first": {
                "match": {
                  "span_term": {
                    "name": "ka"
                  }
                },
                "end": 1
              }
            },
            {
              "span_first": {
                "match": {
                  "span_term": {
                    "name": "ka"
                  }
                },
                "end": 2
              }
            }
          ]
        }
      },
      "sort": [
        {
          "_score": {
            "order": "desc"
          }
        },
        {
          "name.standard": {
            "order": "asc"
          }
        }
      ]
    }
    

    我得到的结果
    "hits": [
             {
                "_index": "esedge",
                "_type": "policy_data",
                "_id": "2",
                "_score": 0.72272325,
                "_source": {
                   "name": "Kaley Bartoletti"
                },
                "sort": [
                   0.72272325,
                   "bartoletti"
                ]
             },
             {
                "_index": "esedge",
                "_type": "policy_data",
                "_id": "3",
                "_score": 0.72272325,
                "_source": {
                   "name": "Kali Hahn"
                },
                "sort": [
                   0.72272325,
                   "hahn"
                ]
             },
             {
                "_index": "esedge",
                "_type": "policy_data",
                "_id": "4",
                "_score": 0.72272325,
                "_source": {
                   "name": "Karolann Klein"
                },
                "sort": [
                   0.72272325,
                   "karolann"
                ]
             },
             {
                "_index": "esedge",
                "_type": "policy_data",
                "_id": "1",
                "_score": 0.54295504,
                "_source": {
                   "name": "Berta Kassulke"
                },
                "sort": [
                   0.54295504,
                   "berta"
                ]
             },
             {
                "_index": "esedge",
                "_type": "policy_data",
                "_id": "5",
                "_score": 0.2905494,
                "_source": {
                   "name": "Sofia Mandez Kaloo"
                },
                "sort": [
                   0.2905494,
                   "kaloo"
                ]
             }
          ]
    

    我希望这有帮助。

    10-07 12:50
    查看更多