我正在尝试为嵌套评分功能找到正确的方法。

数据:

PUT test

PUT test/test/_mapping
{
   "properties": {
      "driver_id": {
         "type": "integer"
      },
      "driver_name": {
         "type": "string"
      },
      "cities": {
         "type": "nested",
         "properties": {
            "city_id": {
               "type": "integer"
            },
            "used": {
               "type": "float"
            }
         }
      },
      "cars": {
         "type": "nested",
         "properties": {
            "car_id": {
               "type": "integer"
            },
            "used": {
               "type": "float"
            }
         }
      }
   }
}


PUT test/test/1
{
  "id":1,
  "driver_name":"Lady Smith",
  "cars":[{"car_id":1,"brand":"Ford Focus","used":0.5},{"car_id":2,"brand":"Toyota Corola","used":0.5}],
  "city":[{"city_id":3,"name":"Tel Aviv","used":0.8},{"city_id":4,"name":"New York","used":0.2}]
}
PUT test/test/2
{
  "id":2,
  "driver_name":"John Smith",
  "cars":[{"car_id":1,"brand":"Ford Focus","used":0.3},{"car_id":2,"brand":"Toyota Corola","used":0.3}],
  "city":[{"city_id":3,"name":"Tel Aviv","used":0.8},{"city_id":4,"name":"New York","used":0.2}]
}
PUT test/test/3
{
  "id":3,
  "driver_name":"Will Smith",
  "cars":[{"car_id":1,"brand":"Ford Focus","used":0.1}],
  "city":[{"city_id":3,"name":"New York","used":0.2}]
}
PUT test/test/4
{
  "id":4,
  "driver_name":"Ash Smith",
  "cars":[],
  "city":[]
}

简而言之,给定数据,我希望最适合驱动特拉维夫花冠的福特的驱动程序的查询。

或者,松散地转换为SQL:
SELECT driver_id,
   cr.cars_score * ct.city_score  AS driver_score
FROM drivers drv
LEFT JOIN (SELECT sum(used) / 2 as cars_score
           FROM car_usage
           WHERE car_id IN (1,2) GROUP BY driver_id) AS cr
  ON (cr.driver_id = drv.driver_id)
LEFT JOIN (SELECT sum(used) / 1 as city_score
           FROM city_usage
           WHERE city_id IN (3) GROUP BY driver_id) AS ct
  ON (ct.driver_id = drv.driver_id)

尝试了以下内容:
{
  "query": {
    "bool": {
      "disable_coord": true,
      "must": [
        {
          "query": {
            "bool": {
              "disable_coord": true,
              "must": [{
                  "function_score": {
                    "query": {
                      "nested": {
                        "path": "cars",
                        "query": {
                          "function_score": {
                            "filter": {
                              "term": {
                                "cars.car_id": 1
                              }
                            },
                            "boost_mode": "replace",
                            "score_mode": "sum",
                            "functions": [
                              {
                                "field_value_factor": {
                                  "field": "cars.used",
                                  "factor":0.5,
                                  "missing": 0
                                }
                              }
                            ]
                          }
                        }
                      }
                    }
                  }
                }, {
                  "function_score": {
                    "query": {
                      "nested": {
                        "path": "cars",
                        "query": {
                          "function_score": {
                            "filter": {
                              "term": {
                                "cars.car_id": 2
                              }
                            },
                            "boost_mode": "replace",
                            "score_mode": "sum",
                            "functions": [
                              {
                                "field_value_factor": {
                                  "field": "cars.used",
                                  "factor":0.5,
                                  "missing": 0
                                }
                              }
                            ]
                          }
                        }
                      }
                    }
                  }
                }
              ]
            }
          }
        },
        {
          "function_score": {
            "query": {
              "nested": {
                "path": "cities",
                "query": {
                  "function_score": {
                    "filter": {
                      "term": {
                        "cities.city_id": 3
                      }
                    },
                    "score_mode": "multiply",
                    "boost_mode": "replace",
                    "functions": [
                      {
                        "field_value_factor": {
                          "field": "cities.used",
                          "missing": 0
                        }
                      }
                    ]
                  }
                }
              }
            }
          }
        }
      ]
    }
  }
}

这给了我奇怪的结果。

然后尝试:
{
    "query": {
        "bool": {
          "must": [
            {
              "nested": {
                "path": "cars",
                "query": {
                  "function_score": {
                    "filter": {
                      "term": {
                        "cars.car_id": 1
                      }
                    },
                    "score_mode": "sum",
                    "boost_mode":"replace",
                    "functions": [
                      {
                        "field_value_factor": {
                          "field": "cars.used",
                          "factor":0.5,
                          "missing": 0
                        }
                      }
                    ]
                  }
                }
              }
            },
            {
              "nested": {
                "path": "cars",
                "query": {
                  "function_score": {
                    "filter": {
                      "term": {
                        "cars.car_id": 2
                      }
                    },
                    "score_mode": "sum",
                    "boost_mode":"replace",
                    "functions": [
                      {
                        "field_value_factor": {
                          "field": "cars.used",
                          "factor":0.5,
                          "missing": 0
                        }
                      }
                    ]
                  }
                }
              }
            },
            {
              "nested": {
                "path": "cities",
                "query": {
                  "function_score": {
                    "filter": {
                      "term": {
                        "cities.city_id": 3
                      }
                    },
                    "score_mode":"multiply",
                    "boost_mode":"replace",
                    "functions": [
                      {
                        "field_value_factor": {
                          "field": "cities.used",
                          "missing": 0
                        }
                      }
                    ]
                  }
                }
              }
            }
          ]
        }
    }
}

距离更近,但似乎只是所有分数的总和。

我的一个 friend 建议扁平化整个JSON,并丢失嵌套的对象(使它们成为属性),但是我不确定这是否会使查询数据更容易。

更新1

另一个失败的尝试:
{
  "query": {
    "function_score": {
      "query": {
        "bool": {
          "must": [
            {
              "nested": {
                "path": "cars",
                "query": {
                  "function_score": {
                    "filter": {
                      "term": {
                        "cars.car_id": 1
                      }
                    },
                    "score_mode": "sum",
                    "functions": [
                      {
                        "field_value_factor": {
                          "field": "cars.used",
                          "factor": 0.5,
                          "missing": 0
                        }
                      }
                    ]
                  }
                }
              }
            },
            {
              "nested": {
                "path": "cars",
                "query": {
                  "function_score": {
                    "filter": {
                      "term": {
                        "cars.car_id": 2
                      }
                    },
                    "score_mode": "sum",
                    "functions": [
                      {
                        "field_value_factor": {
                          "field": "cars.used",
                          "factor": 0.5,
                          "missing": 0
                        }
                      }
                    ]
                  }
                }
              }
            },
            {
              "nested": {
                "path": "cities",
                "query": {
                  "function_score": {
                    "filter": {
                      "term": {
                        "cities.city_id": 3
                      }
                    },
                    "score_mode": "multiply",
                    "functions": [
                      {
                        "field_value_factor": {
                          "field": "cities.used",
                          "missing": 0
                        }
                      }
                    ]
                  }
                }
              }
            }
          ]
        }
      },
      "score_mode": "multiply"
    }
  }
}

更新2

按照我扁平化字段并丢失嵌套过滤器的替代方法,我以以下内容结束:
PUT test2

PUT test2/test2/1
{
  "id":1,
  "driver_name":"Lady Smith",
  "cars_1":{"brand":"Ford Focus","used":0.5},
  "cars_2":{"brand":"Toyota Corola","used":0.5},
  "cities_3":{"name":"Tel Aviv","used":0.8},
  "cities_4":{"name":"New York","used":0.2}
}
PUT test2/test2/2
{
  "id":2,
  "driver_name":"John Smith",
  "cars_1":{"brand":"Ford Focus","used":0.3},
  "cars_2":{"brand":"Toyota Corola","used":0.3},
  "cities_3":{"name":"Tel Aviv","used":0.8},
  "cities_4":{"name":"New York","used":0.2}
}
PUT test2/test2/3
{
  "id":3,
  "driver_name":"Will Smith",
  "cars_1":{"brand":"Ford Focus","used":0.1},
  "cities_4":{"name":"New York","used":0.2}
}
PUT test2/test2/4
{
  "id":4,
  "driver_name":"Ash Smith",
}

post test2/_search
{
   "query": {
      "bool": {
         "disable_coord": true,
         "must": [
            {
               "match": {
                  "name": "red pepper"
               }
            }
         ],
         "should": [
            {
               "nested": {
                  "path": "words",
                  "query": {
                     "function_score": {
                        "functions": [
                           {
                              "field_value_factor": {
                                "field" : "words.weight",
                                "missing": 0
                              }
                           }
                        ],
                        "query": {
                           "match": {
                              "words.text": "red pepper"
                           }
                        },
                        "score_mode": "sum",
                        "boost_mode": "replace"
                     }
                  },
                  "score_mode": "total"
               }
            }
         ]
      }
   }
}

GET test2/_search
{
  "query": {
    "function_score": {
      "query":{
        "bool":{
          "must":[{
            "exists":{"field":"cars_1"}
          },{
            "exists":{"field":"cars_2"}
        },{
          "exists":{"field":"cities_3"}
        }]
        }
      },
      "score_mode": "multiply",
      "boost_mode": "replace",
      "functions": [{
        "script_score": {
          "script": {
            "inline": "(doc['cars_1.used'].value + doc['cars_2.used'].value) / 2 * doc['cities_3.used'].value"
          }
        }
      }]
    }
  }
}

但是我不确定内联脚本的性能是否会受到影响。感觉像我缺少一个简单的解决方案。

最佳答案

仅供以后访问该帖子的人引用,
我最终更改了数据模型,并使用了script_score(5.0.0中的lang:"painless")

警告:尽管此方法确实满足了我的需求,但对性能有影响,根据目测估计,响应时间将缩短3-5倍。

目前,对我来说已经足够了。

PUT测试2

PUT test2/test2/1
{
  "id":1,
  "driver_name":"Lady Smith",
  "cars_1":{"brand":"Ford Focus","used":0.5},
  "cars_2":{"brand":"Toyota Corola","used":0.5},
  "cities_3":{"name":"Tel Aviv","used":0.8},
  "cities_4":{"name":"New York","used":0.2}
}
PUT test2/test2/2
{
  "id":2,
  "driver_name":"John Smith",
  "cars_1":{"brand":"Ford Focus","used":0.3},
  "cars_2":{"brand":"Toyota Corola","used":0.3},
  "cities_3":{"name":"Tel Aviv","used":0.8},
  "cities_4":{"name":"New York","used":0.2}
}
PUT test2/test2/3
{
  "id":3,
  "driver_name":"Will Smith",
  "cars_1":{"brand":"Ford Focus","used":0.1},
  "cities_4":{"name":"New York","used":0.2}
}
PUT test2/test2/4
{
  "id":4,
  "driver_name":"Ash Smith",
}

post test2/_search
{
   "query": {
      "bool": {
         "disable_coord": true,
         "must": [
            {
               "match": {
                  "name": "red pepper"
               }
            }
         ],
         "should": [
            {
               "nested": {
                  "path": "words",
                  "query": {
                     "function_score": {
                        "functions": [
                           {
                              "field_value_factor": {
                                "field" : "words.weight",
                                "missing": 0
                              }
                           }
                        ],
                        "query": {
                           "match": {
                              "words.text": "red pepper"
                           }
                        },
                        "score_mode": "sum",
                        "boost_mode": "replace"
                     }
                  },
                  "score_mode": "total"
               }
            }
         ]
      }
   }
}

GET test2/_search
{
  "query": {
    "function_score": {
      "query":{
        "bool":{
          "must":[{
            "exists":{"field":"cars_1"}
          },{
            "exists":{"field":"cars_2"}
        },{
          "exists":{"field":"cities_3"}
        }]
        }
      },
      "score_mode": "multiply",
      "boost_mode": "replace",
      "functions": [{
        "script_score": {
          "script": {
            "inline": "(doc['cars_1.used'].value + doc['cars_2.used'].value) / 2 * doc['cities_3.used'].value"
          }
        }
      }]
    }
  }
}

关于elasticsearch - 使用嵌套的function_scores进行Elasticsearch评分,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/40359203/

10-15 20:25