Elasticsearch指標聚合分析

指標聚合分析

平均值分析 - avg

  • grade字段的平均值
{
    "size": 0,
    "aggs" : {
        "avg_grade" : { 
            "avg" : { 
                "field" : "grade" 
            }
        }
    }
}
  • 使用腳本
{
    "size": 0,
    "aggs" : {
        "avg_grade" : {
            "avg" : {
                "script" : {
                    "source" : "doc.grade.value"
                }
            }
        }
    }
}
  • 文件腳本
{
    "size": 0,
    "aggs" : {
        "avg_grade" : {
            "avg" : {
                "script" : {
                    "file": "my_script",
                    "params": {
                        "field": "grade"
                    }
                }
            }
        }
    }
}
  • 值腳本(個人理解爲加權)
{
    "size": 0,
    "aggs" : {
        "avg_corrected_grade" : {
            "avg" : {
                "field" : "grade",
                "script" : {
                    "lang": "painless",
                    "source": "_value * params.correction",
                    "params" : {
                        "correction" : 1.2
                    }
                }
            }
        }
    }
}
  • 缺失值處理
{
    "size": 0,
    "aggs" : {
        "grade_avg" : {
            "avg" : {
                "field" : "grade",
                "missing": 10 
            }
        }
    }
}

去重操作 - cardinality

  • 種類去重
{
    "size": 0,
    "aggs" : {
        "type_count" : {
            "cardinality" : {
                "field" : "type"
            }
        }
    }
}
  • 精準控制去重

precision_threshold 這個參數指定了計數範圍,最大支持40000,默認3000,超過此範圍,去重將不準確

{
    "aggs" : {
        "type_count" : {
            "cardinality" : {
                "field" : "type",
                "precision_threshold": 100 
            }
        }
    }
}
  • 腳本
{
    "aggs" : {
        "type_promoted_count" : {
            "cardinality" : {
                "script": {
                    "lang": "painless",
                    "source": "doc['type'].value + ' ' + doc['promoted'].value"
                }
            }
        }
    }
}
  • 文件腳本
{
    "aggs" : {
        "type_promoted_count" : {
            "cardinality" : {
                "script" : {
                    "file": "my_script",
                    "params": {
                        "type_field": "type",
                        "promoted_field": "promoted"
                    }
                }
            }
        }
    }
}
  • 缺失值處理
{
    "aggs" : {
        "tag_cardinality" : {
            "cardinality" : {
                "field" : "tag",
                "missing": "N/A" 
            }
        }
    }
}

聚合指標 - extended_stats

  • 所有指標

包括計數,最值,平均值等。

{
    "aggs" : {
        "grades_stats" : { 
            "extended_stats" : { 
                "field" : "grade"
            }
        }
    }
}
  • 指定sigma標準差個數
{
    "aggs" : {
        "grades_stats" : {
            "extended_stats" : {
                "field" : "grade",
                "sigma" : 3 
            }
        }
    }
}
  • 腳本
{
    "aggs" : {
        "grades_stats" : {
            "extended_stats" : {
                "script" : {
                    "source" : "doc['grade'].value",
                    "lang" : "painless"
                 }
             }
         }
    }
}
  • 文件腳本
{
    "aggs" : {
        "grades_stats" : {
            "extended_stats" : {
                "script" : {
                    "file": "my_script",
                    "params": {
                        "field": "grade"
                    }
                }
            }
        }
    }
}
  • 值腳本
{
    "aggs" : {
        "aggs" : {
            "grades_stats" : {
                "extended_stats" : {
                    "field" : "grade",
                    "script" : {
                        "lang" : "painless",
                        "source": "_value * params.correction",
                        "params" : {
                            "correction" : 1.2
                        }
                    }
                }
            }
        }
    }
}
  • 缺省值處理
{
    "aggs" : {
        "grades_stats" : {
            "extended_stats" : {
                "field" : "grade",
                "missing": 0 
            }
        }
    }
}

地理區域座標聚合 - geo_bounds

wrap_longitude允許邊界和時區線重合

{
    "aggs" : {
        "viewport" : {
            "geo_bounds" : {
                "field" : "location", 
                "wrap_longitude" : true 
            }
        }
    }
}

地理區域的一個點 - geo_centroid

{
    "aggs" : {
        "centroid" : {
            "geo_centroid" : {
                "field" : "location" 
            }
        }
    }
}
  • 一個栗子

根據city字段,聚合分析location字段

{
    "aggs" : {
        "cities" : {
            "terms" : { "field" : "city.keyword" },
            "aggs" : {
                "centroid" : {
                    "geo_centroid" : { "field" : "location" }
                }
            }
        }
    }
}

最大值 - Max

{
    "aggs" : {
        "max_price" : { "max" : { "field" : "price" } }
    }
}
  • 腳本
{
    "aggs" : {
        "max_price" : {
            "max" : {
                "script" : {
                    "source" : "doc.price.value"
                }
            }
        }
    }
}
  • 文件腳本
{
    "aggs" : {
        "max_price" : {
            "max" : {
                "script" : {
                    "file": "my_script",
                    "params": {
                        "field": "price"
                    }
                }
            }
        }
    }
}
  • 值腳本
{
    "aggs" : {
        "max_price_in_euros" : {
            "max" : {
                "field" : "price",
                "script" : {
                    "source" : "_value * params.conversion_rate",
                    "params" : {
                        "conversion_rate" : 1.2
                    }
                }
            }
        }
    }
}
  • 缺省值處理
{
    "aggs" : {
        "grade_max" : {
            "max" : {
                "field" : "grade",
                "missing": 10 
            }
        }
    }
}

最小值 - Min

{
    "aggs" : {
        "min_price" : { "min" : { "field" : "price" } }
    }
}
  • 腳本
{
    "aggs" : {
        "min_price" : {
            "min" : {
                "script" : {
                    "source" : "doc.price.value"
                }
            }
        }
    }
}
  • 文件腳本
{
    "aggs" : {
        "min_price" : {
            "min" : {
                "script" : {
                    "file": "my_script",
                    "params": {
                        "field": "price"
                    }
                }
            }
        }
    }
}
  • 值腳本
{
    "aggs" : {
        "min_price_in_euros" : {
            "min" : {
                "field" : "price",
                "script" : {
                    "source" : "_value * params.conversion_rate",
                    "params" : {
                        "conversion_rate" : 1.2
                    }
                }
            }
        }
    }
}
  • 缺省值處理
{
    "aggs" : {
        "grade_min" : {
            "min" : {
                "field" : "grade",
                "missing": 10 
            }
        }
    }
}

百分數聚合分析 - percentiles

  • 對load_time字段進行,返回指定percents百分區間的count。

默認情況下:percents:[ 1, 5, 25, 50, 75, 95, 99 ]

{
    "aggs" : {
        "load_time_outlier" : {
            "percentiles" : {
                "field" : "load_time",
                "percents" : [95, 99, 99.9] 
            }
        }
    }
}
  • keyed Response

我也不知道是做什麼的

{
    "aggs": {
        "balance_outlier": {
            "percentiles": {
                "field": "balance",
                "keyed": false
            }
        }
    }
}
  • 腳本 可以加權後聚合
{
    "aggs" : {
        "load_time_outlier" : {
            "percentiles" : {
                "script" : {
                    "lang": "painless",
                    "source": "doc['load_time'].value / params.timeUnit", 
                    "params" : {
                        "timeUnit" : 1000   
                    }
                }
            }
        }
    }
}
  • 文件腳本
{
    "aggs" : {
        "load_time_outlier" : {
            "percentiles" : {
                "script" : {
                    "file": "my_script",
                    "params" : {
                        "timeUnit" : 1000
                    }
                }
            }
        }
    }
}
  • 壓縮

通過指定tdigestcompression值,控制內存使用。compression最大爲20*compression

{
    "aggs" : {
        "load_time_outlier" : {
            "percentiles" : {
                "field" : "load_time",
                "tdigest": {
                  "compression" : 200 
                }
            }
        }
    }
}
  • 直方圖
{
    "aggs" : {
        "load_time_outlier" : {
            "percentiles" : {
                "field" : "load_time",
                "percents" : [95, 99, 99.9],
                "hdr": { 
                  "number_of_significant_value_digits" : 3   # 分辨率有效位數
                }
            }
        }
    }
}
  • 缺省值處理
{
    "aggs" : {
        "grade_percentiles" : {
            "percentiles" : {
                "field" : "grade",
                "missing": 10 
            }
        }
    }
}

百分排名聚合分析 - percentile_ranks

  • 15以內的佔比,30以內的佔比
{
    "aggs" : {
        "load_time_outlier" : {
            "percentile_ranks" : {
                "field" : "load_time", 
                "values" : [15, 30]
            }
        }
    }
}
  • keyed Response
{
    "aggs": {
        "balance_outlier": {
            "percentile_ranks": {
                "field": "balance",
                "values": [25000, 50000],
                "keyed": false
            }
        }
    }
}
  • 腳本
{
    "aggs" : {
        "load_time_outlier" : {
            "percentile_ranks" : {
                "values" : [3, 5],
                "script" : {
                    "lang": "painless",
                    "source": "doc['load_time'].value / params.timeUnit", 
                    "params" : {
                        "timeUnit" : 1000   
                    }
                }
            }
        }
    }
}
  • 文件腳本
{
    "aggs" : {
        "load_time_outlier" : {
            "percentile_ranks" : {
                "values" : [3, 5],
                "script" : {
                    "file": "my_script",
                    "params" : {
                        "timeUnit" : 1000
                    }
                }
            }
        }
    }
}
  • 直方圖
{
    "aggs" : {
        "load_time_outlier" : {
            "percentile_ranks" : {
                "field" : "load_time",
                "values" : [15, 30],
                "hdr": { 
                  "number_of_significant_value_digits" : 3 
                }
            }
        }
    }
}
  • 缺失值處理
{
    "aggs" : {
        "grade_ranks" : {
            "percentile_ranks" : {
                "field" : "grade",
                "missing": 10 
            }
        }
    }
}

統計彙總分析 - stats

  • 返回所有的分析結果
{
    "aggs" : {
        "aggs" : {
            "grades_stats" : { 
                "stats" : { 
                    "field" : "grade" 
                }
            }
        }
    }
}
  • 腳本
{
    "aggs" : {
        "grades_stats" : {
             "stats" : {
                 "script" : {
                     "lang": "painless",
                     "source": "doc['grade'].value"
                 }
             }
         }
    }
}
  • 文件腳本
{
    "aggs" : {
        "grades_stats" : {
            "stats" : {
                "script" : {
                    "file": "my_script",
                    "params" : {
                        "field" : "grade"
                    }
                }
            }
        }
    }
}
  • 值腳本
{
    "aggs" : {
        "aggs" : {
            "grades_stats" : {
                "stats" : {
                    "field" : "grade",
                    "script" :
                        "lang": "painless",
                        "source": "_value * params.correction",
                        "params" : {
                            "correction" : 1.2
                        }
                    }
                }
            }
        }
    }
}
  • 缺失值處理
{
    "aggs" : {
        "grades_stats" : {
            "stats" : {
                "field" : "grade",
                "missing": 0 
            }
        }
    }
}

求和聚合分析 - sum

{
    "aggs" : {
        "hat_prices" : { "sum" : { "field" : "price" } }
    }
}
  • 腳本
{
    "aggs" : {
        "hat_prices" : {
            "sum" : {
                "script" : {
                   "source": "doc.price.value"
                }
            }
        }
    }
}
  • 文件腳本
{
    "aggs" : {
        "hat_prices" : {
            "sum" : {
                "script" : {
                    "file": "my_script",
                    "params" : {
                        "field" : "price"
                    }
                }
            }
        }
    }
}
  • 值腳本
{
    "aggs" : {
        "square_hats" : {
            "sum" : {
                "field" : "price",
                "script" : {
                    "source": "_value * _value"
                }
            }
        }
    }
}

  • 缺失值處理
{
    "aggs" : {
        "hat_prices" : {
            "sum" : {
                "field" : "price",
                "missing": 100 
            }
        }
    }
}

熱點數據 - top_hits

  • 一個栗子

這個聚合分析方法返回常規的搜索命中,你可以在**_souce**字段中指定返回的字段。

{
    "aggs": {
        "top_tags": {
            "terms": {
                "field": "type",
                "size": 3
            },
            "aggs": {
                "top_sales_hits": {
                    "top_hits": {
                        "sort": [
                            {
                                "date": {
                                    "order": "desc"
                                }
                            }
                        ],
                        "_source": {
                            "includes": [ "date", "price" ]
                        },
                        "size" : 1
                    }
                }
            }
        }
    }
}

計數聚合分析 - value_count

  • 返回有type字段的個數
{
    "aggs" : {
        "types_count" : { "value_count" : { "field" : "type" } }
    }
}
  • 腳本
{
    "aggs" : {
        "type_count" : {
            "value_count" : {
                "script" : {
                    "source" : "doc['type'].value"
                }
            }
        }
    }
}
  • 文件腳本
{
    "aggs" : {
        "types_count" : {
            "value_count" : {
                "script" : {
                    "file": "my_script",
                    "params" : {
                        "field" : "type"
                    }
                }
            }
        }
    }
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章