Elasticsearch存储桶聚合

存储桶聚合

是一种类似于桶的聚合分析,每个结果放在一个桶里,支持子聚合

邻接矩阵聚合 - adjacency_matrix

  • 5.6的新功能
{
  "size": 0,
  "aggs" : {
    "interactions" : {
      "adjacency_matrix" : {
        "filters" : {
          "grpA" : { "terms" : { "accounts" : ["hillary", "sidney"] }},
          "grpB" : { "terms" : { "accounts" : ["donald", "mitt"] }},
          "grpC" : { "terms" : { "accounts" : ["vladimir", "nigel"] }}
        }
      }
    }
  }
}

子聚合 - _parten

设置mapping,answer和question为父子关系。返回问题和答案的匹配文档。

# 创建关联mapping
{
    "mappings": {
        "answer" : {
            "_parent" : {
                "type" : "question"
            }
        }
    }
}
# 分析
{
  "aggs": {
    "top-tags": {
      "terms": {
        "field": "tags.keyword",
        "size": 10
      },
      "aggs": {
        "to-answers": {
          "children": {
            "type" : "answer" 
          },
          "aggs": {
            "top-names": {
              "terms": {
                "field": "owner.display_name.keyword",
                "size": 10
              }
            }
          }
        }
      }
    }
  }
}

日期直方图聚合分析 - date_histogram

  • interval参数:

type1:year, quarter, month, week, day, hour, minute, second

type2:1.5h, 90m…

{
    "aggs" : {
        "sales_over_time" : {
            "date_histogram" : {
                "field" : "date",
                "interval" : "month"
            }
        }
    }
}
  • 格式化时间

时间间隔为一月

{
    "aggs" : {
        "sales_over_time" : {
            "date_histogram" : {
                "field" : "date",
                "interval" : "1M",
                "format" : "yyyy-MM-dd" 
            }
        }
    }
}
  • 时区问题
{
  "aggs": {
    "by_day": {
      "date_histogram": {
        "field":     "date",
        "interval":  "day",
        "time_zone": "-01:00"
      }
    }
  }
}
  • 时间偏移量
{
  "aggs": {
    "by_day": {
      "date_histogram": {
        "field":     "date",
        "interval":  "day",
        "offset":    "+6h"
      }
    }
  }
}
  • keyed Response

返回值中,将field指定的字段的唯一值,以hash输出为每个桶的key。

{
    "aggs" : {
        "sales_over_time" : {
            "date_histogram" : {
                "field" : "date",
                "interval" : "1M",
                "format" : "yyyy-MM-dd",
                "keyed": true
            }
        }
    }
}
  • 缺失值处理
{
    "aggs" : {
        "sale_date" : {
             "date_histogram" : {
                 "field" : "date",
                 "interval": "year",
                 "missing": "2000/01/01" 
             }
         }
    }
}
  • 使用脚本按照星期几分析
{
    "aggs": {
        "dayOfWeek": {
            "terms": {
                "script": {
                    "lang": "painless",
                    "source": "doc['date'].date.dayOfWeek"
                }
            }
        }
    }
}

日期范围分析 - data_range

  • 日期范围
    now-10M/M:当前日期增加10个月,偏移到月初。to:向上,from:向下。
{
    "aggs": {
        "range": {
            "date_range": {
                "field": "date",
                "format": "MM-yyy",
                "ranges": [
                    { "to": "now-10M/M" }, 
                    { "from": "now-10M/M" } 
                ]
            }
        }
    }
}
符号 说明 类型 栗子
G 时代 text AD
C 世纪(>=0) number 20
Y 年代(>=0) year 1996
x 周年 year 1996
w 一年中的周 number 27
e 一周中的天 number 2
E 星期 text Tuesday; Tue
y year 1996
D 一年中的天 number 189
M month July; Jul; 07
d 一月中的天 number 10
a 半天 text PM
K 半天的第几小时(0~11) number 0
h 半天的时钟 (1~12) number 12
H 小时 number 0
k 一天的时钟(1~24) number 24
m 分钟 number 30
s number 55
S 毫秒 number 978
z 时区 text Pacific Standard Time; PST
Z 时区偏移 zone -0800; -08:00; America/Los_Angeles
转义字符 delimiter ‘’
  • 时区范围聚合分析
{
   "aggs": {
       "range": {
           "date_range": {
               "field": "date",
               "time_zone": "CET",
               "ranges": [
                  { "to": "2016/02/01" }, 
                  { "from": "2016/02/01", "to" : "now/d" },
                  { "from": "now/d" }
              ]
          }
      }
   }
}
  • keyed Response
{
    "aggs": {
        "range": {
            "date_range": {
                "field": "date",
                "format": "MM-yyy",
                "ranges": [
                    { "to": "now-10M/M" },
                    { "from": "now-10M/M" }
                ],
                "keyed": true
            }
        }
    }
}
or 
{
    "aggs": {
        "range": {
            "date_range": {
                "field": "date",
                "format": "MM-yyy",
                "ranges": [
                    { "from": "01-2015",  "to": "03-2015", "key": "quarter_01" },
                    { "from": "03-2015", "to": "06-2015", "key": "quarter_02" }
                ],
                "keyed": true
            }
        }
    }
}

多样化采集聚合分析 - diversified_sampler

{
    "query": {
        "query_string": {
            "query": "tags:elasticsearch"
        }
    },
    "aggs": {
        "my_unbiased_sample": {
            "diversified_sampler": {
                "shard_size": 200,
                "field" : "author"
            },
            "aggs": {
                "keywords": {
                    "significant_terms": {
                        "field": "tags",
                        "exclude": ["elasticsearch"]
                    }
                }
            }
        }
    }
}

单过滤聚合 - filter

{
    "aggs" : {
        "t_shirts" : {
            "filter" : { "term": { "type": "t-shirt" } },
            "aggs" : {
                "avg_price" : { "avg" : { "field" : "price" } }
            }
        }
    }
}

多过滤聚合 - filters

{
  "size": 0,
  "aggs" : {
    "messages" : {
      "filters" : {
        "filters" : {
          "errors" :   { "match" : { "body" : "error"   }},
          "warnings" : { "match" : { "body" : "warning" }}
        }
      }
    }
  }
}
  • 匿名过滤聚合
{
  "size": 0,
  "aggs" : {
    "messages" : {
      "filters" : {
        "filters" : [
          { "match" : { "body" : "error"   }},
          { "match" : { "body" : "warning" }}
        ]
      }
    }
  }
}
  • 其他桶聚合

other_bucket_key为其他桶命名,有此参数,other_bucket参数默认为true,否则使用other_bucket参数设置是否显示其他桶。

{
  "size": 0,
  "aggs" : {
    "messages" : {
      "filters" : {
        "other_bucket_key": "other_messages",
        "filters" : {
          "errors" :   { "match" : { "body" : "error"   }},
          "warnings" : { "match" : { "body" : "warning" }}
        }
      }
    }
  }
}

全局聚合 - global

  • 顶级聚合器
{
    "query" : {
        "match" : { "type" : "t-shirt" }
    },
    "aggs" : {
        "all_products" : {
            "global" : {}, 
            "aggs" : { 
                "avg_price" : { "avg" : { "field" : "price" } }
            }
        },
        "t_shirts": { "avg" : { "field" : "price" } }
    }
}

IP范围聚合 - ip_range

{
    "aggs" : {
        "ip_ranges" : {
            "ip_range" : {
                "field" : "ip",
                "ranges" : [
                    { "to" : "10.0.0.5" },
                    { "from" : "10.0.0.5" }
                ]
            }
        }
    }
}
  • CIDR掩码
{
    "aggs" : {
        "ip_ranges" : {
            "ip_range" : {
                "field" : "ip",
                "ranges" : [
                    { "mask" : "10.0.0.0/25" },
                    { "mask" : "10.0.0.127/25" }
                ]
            }
        }
    }
}
  • keyed Response
{
    "aggs": {
        "ip_ranges": {
            "ip_range": {
                "field": "remote_ip",
                "ranges": [
                    { "to" : "10.0.0.5" },
                    { "from" : "10.0.0.5" }
                ],
                "keyed": true
            }
        }
    }
}

# 指定key
{
    "aggs": {
        "ip_ranges": {
            "ip_range": {
                "field": "remote_ip",
                "ranges": [
                    { "key": "infinity", "to" : "10.0.0.5" },
                    { "key": "and-beyond", "from" : "10.0.0.5" }
                ],
                "keyed": true
            }
        }
    }
}

嵌套聚合 - resellers

{
    "product" : {
        "properties" : {
            "resellers" : { 
                "type" : "nested",
                "properties" : {
                    "name" : { "type" : "text" },
                    "price" : { "type" : "double" }
                }
            }
        }
    }
}

  • 或者
{
    "query" : {
        "match" : { "name" : "led tv" }
    },
    "aggs" : {
        "resellers" : {
            "nested" : {
                "path" : "resellers"
            },
            "aggs" : {
                "min_price" : { "min" : { "field" : "resellers.price" } }
            }
        }
    }
}

范围聚合 - range

{
    "aggs" : {
        "price_ranges" : {
            "range" : {
                "field" : "price",
                "ranges" : [
                    { "to" : 50 },
                    { "from" : 50, "to" : 100 },
                    { "from" : 100 }
                ]
            }
        }
    }
}
  • keyed Response
{
    "aggs" : {
        "price_ranges" : {
            "range" : {
                "field" : "price",
                "keyed" : true,
                "ranges" : [
                    { "to" : 50 },
                    { "from" : 50, "to" : 100 },
                    { "from" : 100 }
                ]
            }
        }
    }
}
# 或者
{
    "aggs" : {
        "price_ranges" : {
            "range" : {
                "field" : "price",
                "keyed" : true,
                "ranges" : [
                    { "key" : "cheap", "to" : 50 },
                    { "key" : "average", "from" : 50, "to" : 100 },
                    { "key" : "expensive", "from" : 100 }
                ]
            }
        }
    }
}
  • 脚本
{
    "aggs" : {
        "price_ranges" : {
            "range" : {
                "script" : {
                    "lang": "painless",
                    "source": "doc['price'].value"
                },
                "ranges" : [
                    { "to" : 50 },
                    { "from" : 50, "to" : 100 },
                    { "from" : 100 }
                ]
            }
        }
    }
}
  • 值脚本
{
    "aggs" : {
        "price_ranges" : {
            "range" : {
                "field" : "price",
                "script" : {
                    "lang": "painless",
                    "source": "_value * params.conversion_rate",
                    "params" : {
                        "conversion_rate" : 0.8
                    }
                },
                "ranges" : [
                    { "to" : 35 },
                    { "from" : 35, "to" : 70 },
                    { "from" : 70 }
                ]
            }
        }
    }
}
  • 子聚合
{
    "aggs" : {
        "price_ranges" : {
            "range" : {
                "field" : "price",
                "ranges" : [
                    { "to" : 50 },
                    { "from" : 50, "to" : 100 },
                    { "from" : 100 }
                ]
            },
            "aggs" : {
                "price_stats" : {
                    "stats" : { "field" : "price" }
                }
            }
        }
    }
}
# 或者,基于父聚合的子聚合可以省略stats
{
    "aggs" : {
        "price_ranges" : {
            "range" : {
                "field" : "price",
                "ranges" : [
                    { "to" : 50 },
                    { "from" : 50, "to" : 100 },
                    { "from" : 100 }
                ]
            },
            "aggs" : {
                "price_stats" : {
                    "stats" : {} 
                }
            }
        }
    }
}

反向嵌套聚合 - reverse_nested

该嵌套聚合,必须中nested之下才可使用。

{
    "issue" : {
        "properties" : {
            "tags" : { "type" : "text" },
            "comments" : {   # 出现在反向嵌套语句的nested的path参数中
                "type" : "nested",
                "properties" : {
                    "username" : { "type" : "keyword" },
                    "comment" : { "type" : "text" }
                }
            }
        }
    }
}
# 反向嵌套
{
  "query": {
    "match": {
      "name": "led tv"
    }
  },
  "aggs": {
    "comments": {
      "nested": {
        "path": "comments"
      },
      "aggs": {
        "top_usernames": {
          "terms": {
            "field": "comments.username"
          },
          "aggs": {
            "comment_to_issue": {
              "reverse_nested": {}, 
              "aggs": {
                "top_tags_per_comment": {
                  "terms": {
                    "field": "tags"
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章