測試數據
index 結構
PUT /employees/
{
"mappings" : {
"properties" : {
"age" : {
"type" : "integer"
},
"gender" : {
"type" : "keyword"
},
"job" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 50
}
}
},
"name" : {
"type" : "keyword"
},
"salary" : {
"type" : "integer"
}
}
}
}
插入20條數據
PUT /employees/_bulk
{ "index" : { "_id" : "1" } }
{ "name" : "Emma","age":32,"job":"Product Manager","gender":"female","salary":35000 }
{ "index" : { "_id" : "2" } }
{ "name" : "Underwood","age":41,"job":"Dev Manager","gender":"male","salary": 50000}
{ "index" : { "_id" : "3" } }
{ "name" : "Tran","age":25,"job":"Web Designer","gender":"male","salary":18000 }
{ "index" : { "_id" : "4" } }
{ "name" : "Rivera","age":26,"job":"Web Designer","gender":"female","salary": 22000}
{ "index" : { "_id" : "5" } }
{ "name" : "Rose","age":25,"job":"QA","gender":"female","salary":18000 }
{ "index" : { "_id" : "6" } }
{ "name" : "Lucy","age":31,"job":"QA","gender":"female","salary": 25000}
{ "index" : { "_id" : "7" } }
{ "name" : "Byrd","age":27,"job":"QA","gender":"male","salary":20000 }
{ "index" : { "_id" : "8" } }
{ "name" : "Foster","age":27,"job":"Java Programmer","gender":"male","salary": 20000}
{ "index" : { "_id" : "9" } }
{ "name" : "Gregory","age":32,"job":"Java Programmer","gender":"male","salary":22000 }
{ "index" : { "_id" : "10" } }
{ "name" : "Bryant","age":20,"job":"Java Programmer","gender":"male","salary": 9000}
{ "index" : { "_id" : "11" } }
{ "name" : "Jenny","age":36,"job":"Java Programmer","gender":"female","salary":38000 }
{ "index" : { "_id" : "12" } }
{ "name" : "Mcdonald","age":31,"job":"Java Programmer","gender":"male","salary": 32000}
{ "index" : { "_id" : "13" } }
{ "name" : "Jonthna","age":30,"job":"Java Programmer","gender":"female","salary":30000 }
{ "index" : { "_id" : "14" } }
{ "name" : "Marshall","age":32,"job":"Javascript Programmer","gender":"male","salary": 25000}
{ "index" : { "_id" : "15" } }
{ "name" : "King","age":33,"job":"Java Programmer","gender":"male","salary":28000 }
{ "index" : { "_id" : "16" } }
{ "name" : "Mccarthy","age":21,"job":"Javascript Programmer","gender":"male","salary": 16000}
{ "index" : { "_id" : "17" } }
{ "name" : "Goodwin","age":25,"job":"Javascript Programmer","gender":"male","salary": 16000}
{ "index" : { "_id" : "18" } }
{ "name" : "Catherine","age":29,"job":"Javascript Programmer","gender":"female","salary": 20000}
{ "index" : { "_id" : "19" } }
{ "name" : "Boone","age":30,"job":"DBA","gender":"male","salary": 30000}
{ "index" : { "_id" : "20" } }
{ "name" : "Kathy","age":29,"job":"DBA","gender":"female","salary": 20000}
query
ES聚合分析的默認作用範圍是query的查詢結果集,也就是說,聚合是在先query之後的結果裏面進行的,
# 查詢年齡大於等於30歲的員工,並且對符合條件的員工按照職位類型進行分組
POST /employees/_search
{
"size": 3,
"query": {
"range": {
"age": {
"gte": 30
}
}
},
"aggs": {
"jobs": {
"terms": {
"field": "job.keyword"
}
}
}
}
返回結果
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"name" : "Emma",
"age" : 32,
"job" : "Product Manager",
"gender" : "female",
"salary" : 35000
}
},
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"name" : "Underwood",
"age" : 41,
"job" : "Dev Manager",
"gender" : "male",
"salary" : 50000
}
},
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "6",
"_score" : 1.0,
"_source" : {
"name" : "Lucy",
"age" : 31,
"job" : "QA",
"gender" : "female",
"salary" : 25000
}
}
]
},
"aggregations" : {
"jobs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Java Programmer",
"doc_count" : 5
},
{
"key" : "DBA",
"doc_count" : 1
},
{
"key" : "Dev Manager",
"doc_count" : 1
},
{
"key" : "Javascript Programmer",
"doc_count" : 1
},
{
"key" : "Product Manager",
"doc_count" : 1
},
{
"key" : "QA",
"doc_count" : 1
}
]
}
}
}
filter
如果我們想僅僅對聚合的數據進行篩選不影響query的結果,或者我們只想在一個聚合中進行篩選,而不影響其他聚合的結果呢,此時我們可以使用filter
POST employees/_search
{
"size": 3,
"query": {
"range": {
"age": {
"gte": 30
}
}
},
"aggs": {
"older_person": {
"filter": {
"range": {
"age": {
"from": 35
}
}
},
"aggs": {
"jobs": {
"terms": {
"field": "job.keyword"
}
}
}
},
"all_jobs": {
"terms": {
"field": "job.keyword"
}
}
}
}
結果如下:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"name" : "Emma",
"age" : 32,
"job" : "Product Manager",
"gender" : "female",
"salary" : 35000
}
},
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"name" : "Underwood",
"age" : 41,
"job" : "Dev Manager",
"gender" : "male",
"salary" : 50000
}
},
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "6",
"_score" : 1.0,
"_source" : {
"name" : "Lucy",
"age" : 31,
"job" : "QA",
"gender" : "female",
"salary" : 25000
}
}
]
},
"aggregations" : {
"older_person" : {
"doc_count" : 2,
"jobs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Dev Manager",
"doc_count" : 1
},
{
"key" : "Java Programmer",
"doc_count" : 1
}
]
}
},
"all_jobs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Java Programmer",
"doc_count" : 5
},
{
"key" : "DBA",
"doc_count" : 1
},
{
"key" : "Dev Manager",
"doc_count" : 1
},
{
"key" : "Javascript Programmer",
"doc_count" : 1
},
{
"key" : "Product Manager",
"doc_count" : 1
},
{
"key" : "QA",
"doc_count" : 1
}
]
}
}
}
我們可以看到query部分和上面query的結果是一樣的,然後older_person裏面的聚合才使用到了filter裏面的條件,all_jobs裏面和上面的query裏面的一樣。
post_filter
如果你想篩選條件只適用於查詢,不適用於聚合,那該怎麼辦?使用post_filter
比如,我想篩選出年齡小於23歲的員工,並且按照職位類別進行分組,然後查詢出小於35歲的員工,按照id排序取前五位
POST employees/_search
{
"size": 5,
"aggs": {
"young_person": {
"filter": {
"range": {
"age": {
"lte": 23
}
}
},
"aggs": {
"jobs": {
"terms": {
"field": "job.keyword"
}
}
}
}
},
"post_filter": {
"range": {
"age": {
"lte": 35
}
}
}
}
結果如下:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 18,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"name" : "Emma",
"age" : 32,
"job" : "Product Manager",
"gender" : "female",
"salary" : 35000
}
},
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "3",
"_score" : 1.0,
"_source" : {
"name" : "Tran",
"age" : 25,
"job" : "Web Designer",
"gender" : "male",
"salary" : 18000
}
},
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "4",
"_score" : 1.0,
"_source" : {
"name" : "Rivera",
"age" : 26,
"job" : "Web Designer",
"gender" : "female",
"salary" : 22000
}
},
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "5",
"_score" : 1.0,
"_source" : {
"name" : "Rose",
"age" : 25,
"job" : "QA",
"gender" : "female",
"salary" : 18000
}
},
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "6",
"_score" : 1.0,
"_source" : {
"name" : "Lucy",
"age" : 31,
"job" : "QA",
"gender" : "female",
"salary" : 25000
}
}
]
},
"aggregations" : {
"young_person" : {
"doc_count" : 2,
"jobs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Java Programmer",
"doc_count" : 1
},
{
"key" : "Javascript Programmer",
"doc_count" : 1
}
]
}
}
}
}
這個例子中,post_filter的篩選條件並沒有用到聚合裏面,所以你可以理解爲post_filter
和query
是一對相反的操作,query對聚合的數據先進行篩選再聚合,post_filter和聚合是相互獨立的互不影響。
global
最後一個是global,可以無視query的影響,比如我們想篩選出大於35歲的員工,並且按照職業類型分組,然後我們還想獲得所有員工的平均工資。
POST /employees/_search
{
"size": 0,
"query": {
"range": {
"age": {
"gte": 35
}
}
},
"aggs": {
"jobs": {
"terms": {
"field": "job.keyword"
}
},
"all": {
"global": {},
"aggs": {
"salary_avg": {
"avg": {
"field": "salary"
}
}
}
}
}
}
結果如下:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"all" : {
"doc_count" : 20,
"salary_avg" : {
"value" : 24700.0
}
},
"jobs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Dev Manager",
"doc_count" : 1
},
{
"key" : "Java Programmer",
"doc_count" : 1
}
]
}
}
}
上面我們可以看到,24700.0是所有員工的平均工資,然後jobs裏面是大於35歲的員工的類別,分別是Dev Manager和Java programmer。
當然要想實現上面的需求,我們也可以有其他寫法,這裏僅僅是爲了展示global
的用法。
比如下面的代碼
POST /employees/_search
{
"size": 0,
"aggs": {
"old_persons": {
"filter": {
"range": {
"age": {
"gte": 35
}
}
},
"aggs": {
"jobs": {
"terms": {
"field": "job.keyword"
}
}
}
},
"avg_salary": {
"avg": {
"field": "salary"
}
}
}
}
結果如下:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 20,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"old_persons" : {
"doc_count" : 2,
"jobs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Dev Manager",
"doc_count" : 1
},
{
"key" : "Java Programmer",
"doc_count" : 1
}
]
}
},
"avg_salary" : {
"value" : 24700.0
}
}
}