我正在尝试为嵌套评分功能找到正确的方法。
数据:
PUT test
PUT test/test/_mapping
{
"properties": {
"driver_id": {
"type": "integer"
},
"driver_name": {
"type": "string"
},
"cities": {
"type": "nested",
"properties": {
"city_id": {
"type": "integer"
},
"used": {
"type": "float"
}
}
},
"cars": {
"type": "nested",
"properties": {
"car_id": {
"type": "integer"
},
"used": {
"type": "float"
}
}
}
}
}
PUT test/test/1
{
"id":1,
"driver_name":"Lady Smith",
"cars":[{"car_id":1,"brand":"Ford Focus","used":0.5},{"car_id":2,"brand":"Toyota Corola","used":0.5}],
"city":[{"city_id":3,"name":"Tel Aviv","used":0.8},{"city_id":4,"name":"New York","used":0.2}]
}
PUT test/test/2
{
"id":2,
"driver_name":"John Smith",
"cars":[{"car_id":1,"brand":"Ford Focus","used":0.3},{"car_id":2,"brand":"Toyota Corola","used":0.3}],
"city":[{"city_id":3,"name":"Tel Aviv","used":0.8},{"city_id":4,"name":"New York","used":0.2}]
}
PUT test/test/3
{
"id":3,
"driver_name":"Will Smith",
"cars":[{"car_id":1,"brand":"Ford Focus","used":0.1}],
"city":[{"city_id":3,"name":"New York","used":0.2}]
}
PUT test/test/4
{
"id":4,
"driver_name":"Ash Smith",
"cars":[],
"city":[]
}
简而言之,给定数据,我希望最适合驱动特拉维夫花冠的福特和的驱动程序的查询。
或者,松散地转换为SQL:
SELECT driver_id,
cr.cars_score * ct.city_score AS driver_score
FROM drivers drv
LEFT JOIN (SELECT sum(used) / 2 as cars_score
FROM car_usage
WHERE car_id IN (1,2) GROUP BY driver_id) AS cr
ON (cr.driver_id = drv.driver_id)
LEFT JOIN (SELECT sum(used) / 1 as city_score
FROM city_usage
WHERE city_id IN (3) GROUP BY driver_id) AS ct
ON (ct.driver_id = drv.driver_id)
尝试了以下内容:
{
"query": {
"bool": {
"disable_coord": true,
"must": [
{
"query": {
"bool": {
"disable_coord": true,
"must": [{
"function_score": {
"query": {
"nested": {
"path": "cars",
"query": {
"function_score": {
"filter": {
"term": {
"cars.car_id": 1
}
},
"boost_mode": "replace",
"score_mode": "sum",
"functions": [
{
"field_value_factor": {
"field": "cars.used",
"factor":0.5,
"missing": 0
}
}
]
}
}
}
}
}
}, {
"function_score": {
"query": {
"nested": {
"path": "cars",
"query": {
"function_score": {
"filter": {
"term": {
"cars.car_id": 2
}
},
"boost_mode": "replace",
"score_mode": "sum",
"functions": [
{
"field_value_factor": {
"field": "cars.used",
"factor":0.5,
"missing": 0
}
}
]
}
}
}
}
}
}
]
}
}
},
{
"function_score": {
"query": {
"nested": {
"path": "cities",
"query": {
"function_score": {
"filter": {
"term": {
"cities.city_id": 3
}
},
"score_mode": "multiply",
"boost_mode": "replace",
"functions": [
{
"field_value_factor": {
"field": "cities.used",
"missing": 0
}
}
]
}
}
}
}
}
}
]
}
}
}
这给了我奇怪的结果。
然后尝试:
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "cars",
"query": {
"function_score": {
"filter": {
"term": {
"cars.car_id": 1
}
},
"score_mode": "sum",
"boost_mode":"replace",
"functions": [
{
"field_value_factor": {
"field": "cars.used",
"factor":0.5,
"missing": 0
}
}
]
}
}
}
},
{
"nested": {
"path": "cars",
"query": {
"function_score": {
"filter": {
"term": {
"cars.car_id": 2
}
},
"score_mode": "sum",
"boost_mode":"replace",
"functions": [
{
"field_value_factor": {
"field": "cars.used",
"factor":0.5,
"missing": 0
}
}
]
}
}
}
},
{
"nested": {
"path": "cities",
"query": {
"function_score": {
"filter": {
"term": {
"cities.city_id": 3
}
},
"score_mode":"multiply",
"boost_mode":"replace",
"functions": [
{
"field_value_factor": {
"field": "cities.used",
"missing": 0
}
}
]
}
}
}
}
]
}
}
}
距离更近,但似乎只是所有分数的总和。
我的一个 friend 建议扁平化整个JSON,并丢失嵌套的对象(使它们成为属性),但是我不确定这是否会使查询数据更容易。
更新1
另一个失败的尝试:
{
"query": {
"function_score": {
"query": {
"bool": {
"must": [
{
"nested": {
"path": "cars",
"query": {
"function_score": {
"filter": {
"term": {
"cars.car_id": 1
}
},
"score_mode": "sum",
"functions": [
{
"field_value_factor": {
"field": "cars.used",
"factor": 0.5,
"missing": 0
}
}
]
}
}
}
},
{
"nested": {
"path": "cars",
"query": {
"function_score": {
"filter": {
"term": {
"cars.car_id": 2
}
},
"score_mode": "sum",
"functions": [
{
"field_value_factor": {
"field": "cars.used",
"factor": 0.5,
"missing": 0
}
}
]
}
}
}
},
{
"nested": {
"path": "cities",
"query": {
"function_score": {
"filter": {
"term": {
"cities.city_id": 3
}
},
"score_mode": "multiply",
"functions": [
{
"field_value_factor": {
"field": "cities.used",
"missing": 0
}
}
]
}
}
}
}
]
}
},
"score_mode": "multiply"
}
}
}
更新2
按照我扁平化字段并丢失嵌套过滤器的替代方法,我以以下内容结束:
PUT test2
PUT test2/test2/1
{
"id":1,
"driver_name":"Lady Smith",
"cars_1":{"brand":"Ford Focus","used":0.5},
"cars_2":{"brand":"Toyota Corola","used":0.5},
"cities_3":{"name":"Tel Aviv","used":0.8},
"cities_4":{"name":"New York","used":0.2}
}
PUT test2/test2/2
{
"id":2,
"driver_name":"John Smith",
"cars_1":{"brand":"Ford Focus","used":0.3},
"cars_2":{"brand":"Toyota Corola","used":0.3},
"cities_3":{"name":"Tel Aviv","used":0.8},
"cities_4":{"name":"New York","used":0.2}
}
PUT test2/test2/3
{
"id":3,
"driver_name":"Will Smith",
"cars_1":{"brand":"Ford Focus","used":0.1},
"cities_4":{"name":"New York","used":0.2}
}
PUT test2/test2/4
{
"id":4,
"driver_name":"Ash Smith",
}
post test2/_search
{
"query": {
"bool": {
"disable_coord": true,
"must": [
{
"match": {
"name": "red pepper"
}
}
],
"should": [
{
"nested": {
"path": "words",
"query": {
"function_score": {
"functions": [
{
"field_value_factor": {
"field" : "words.weight",
"missing": 0
}
}
],
"query": {
"match": {
"words.text": "red pepper"
}
},
"score_mode": "sum",
"boost_mode": "replace"
}
},
"score_mode": "total"
}
}
]
}
}
}
GET test2/_search
{
"query": {
"function_score": {
"query":{
"bool":{
"must":[{
"exists":{"field":"cars_1"}
},{
"exists":{"field":"cars_2"}
},{
"exists":{"field":"cities_3"}
}]
}
},
"score_mode": "multiply",
"boost_mode": "replace",
"functions": [{
"script_score": {
"script": {
"inline": "(doc['cars_1.used'].value + doc['cars_2.used'].value) / 2 * doc['cities_3.used'].value"
}
}
}]
}
}
}
但是我不确定内联脚本的性能是否会受到影响。感觉像我缺少一个简单的解决方案。
最佳答案
仅供以后访问该帖子的人引用,
我最终更改了数据模型,并使用了script_score(5.0.0中的lang:"painless"
)
警告:尽管此方法确实满足了我的需求,但对性能有影响,根据目测估计,响应时间将缩短3-5倍。
目前,对我来说已经足够了。
PUT测试2
PUT test2/test2/1
{
"id":1,
"driver_name":"Lady Smith",
"cars_1":{"brand":"Ford Focus","used":0.5},
"cars_2":{"brand":"Toyota Corola","used":0.5},
"cities_3":{"name":"Tel Aviv","used":0.8},
"cities_4":{"name":"New York","used":0.2}
}
PUT test2/test2/2
{
"id":2,
"driver_name":"John Smith",
"cars_1":{"brand":"Ford Focus","used":0.3},
"cars_2":{"brand":"Toyota Corola","used":0.3},
"cities_3":{"name":"Tel Aviv","used":0.8},
"cities_4":{"name":"New York","used":0.2}
}
PUT test2/test2/3
{
"id":3,
"driver_name":"Will Smith",
"cars_1":{"brand":"Ford Focus","used":0.1},
"cities_4":{"name":"New York","used":0.2}
}
PUT test2/test2/4
{
"id":4,
"driver_name":"Ash Smith",
}
post test2/_search
{
"query": {
"bool": {
"disable_coord": true,
"must": [
{
"match": {
"name": "red pepper"
}
}
],
"should": [
{
"nested": {
"path": "words",
"query": {
"function_score": {
"functions": [
{
"field_value_factor": {
"field" : "words.weight",
"missing": 0
}
}
],
"query": {
"match": {
"words.text": "red pepper"
}
},
"score_mode": "sum",
"boost_mode": "replace"
}
},
"score_mode": "total"
}
}
]
}
}
}
GET test2/_search
{
"query": {
"function_score": {
"query":{
"bool":{
"must":[{
"exists":{"field":"cars_1"}
},{
"exists":{"field":"cars_2"}
},{
"exists":{"field":"cities_3"}
}]
}
},
"score_mode": "multiply",
"boost_mode": "replace",
"functions": [{
"script_score": {
"script": {
"inline": "(doc['cars_1.used'].value + doc['cars_2.used'].value) / 2 * doc['cities_3.used'].value"
}
}
}]
}
}
}
关于elasticsearch - 使用嵌套的function_scores进行Elasticsearch评分,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/40359203/