Elasticsearch聚合分析Java Client

本文基於Elasticsearch7.x

在這裏插入圖片描述

本文將上篇Elasticsearch聚合分析Rest API裏的實例轉化爲Java Client

Bucket Aggregation

Bucket Aggregation是一系列滿足特定條件的文檔的集合, 類似於SQL語句裏的分組功能.

(1) main方法

public static void main(String[] args) throws IOException {
    RestHighLevelClient client = new RestHighLevelClient(
            RestClient.builder(
                    new HttpHost("localhost", 9200, "http")));

    bulkIndex(client);

    termAggs(client);

    rangeAggs(client);

    dateRangeAggs(client);

    histogramAggs(client);

    dateHistogramAggs(client);

    filterAggs(client);

    client.close();
}

(2) 添加數據

新增電視機銷售記錄, 用於接下來的聚合分析.

private static void bulkIndex(RestHighLevelClient client) throws IOException {
    BulkRequest bulkRequest = new BulkRequest();

    bulkRequest.add(new IndexRequest("sales").id("1")
            .source(XContentType.JSON, "price", 1000, "color", "紅色", "brand", "長虹", "sold_date", "2019-10-28"));
    bulkRequest.add(new IndexRequest("sales").id("2")
            .source(XContentType.JSON, "price", 2000, "color", "紅色", "brand", "長虹", "sold_date", "2019-11-05"));
    bulkRequest.add(new IndexRequest("sales").id("3")
            .source(XContentType.JSON, "price", 3000, "color", "綠色", "brand", "小米", "sold_date", "2019-05-18"));
    bulkRequest.add(new IndexRequest("sales").id("4")
            .source(XContentType.JSON, "price", 1500, "color", "藍色", "brand", "TCL", "sold_date", "2019-07-02"));
    bulkRequest.add(new IndexRequest("sales").id("5")
            .source(XContentType.JSON, "price", 1200, "color", "綠色", "brand", "TCL", "sold_date", "2019-08-19"));
    bulkRequest.add(new IndexRequest("sales").id("6")
            .source(XContentType.JSON, "price", 2000, "color", "紅色", "brand", "長虹", "sold_date", "2019-11-05"));
    bulkRequest.add(new IndexRequest("sales").id("7")
            .source(XContentType.JSON, "price", 8000, "color", "紅色", "brand", "三星", "sold_date", "2020-01-01"));
    bulkRequest.add(new IndexRequest("sales").id("8")
            .source(XContentType.JSON, "price", 2500, "color", "藍色", "brand", "小米", "sold_date", "2020-02-12"));
    client.bulk(bulkRequest, RequestOptions.DEFAULT);
}

(3) terms

按某個字段進行分組, 比如按品牌進行分組.

private static void termAggs(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //aggs
    TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("term_aggs").field("brand.keyword");
    searchSourceBuilder.aggregation(termsAggregationBuilder);
    searchRequest.source(searchSourceBuilder);

    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
    Terms termAggs = searchResponse.getAggregations().get("term_aggs");
    List<? extends Terms.Bucket> buckets = termAggs.getBuckets();
    for (Terms.Bucket bucket : buckets) {
        System.out.println("key:" + bucket.getKeyAsString() + "\ndoc_count:" + bucket.getDocCount());
    }
}

(4) range

按字段的值範圍進行分組, 比如按電視價格範圍分組. range只能作用於數值類型.

private static void rangeAggs(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //aggs
    RangeAggregationBuilder rangeAggregationBuilder = AggregationBuilders.range("price_range_aggs").field("price").addUnboundedTo(1000).addRange(1000, 3000).addUnboundedFrom(3000);
    searchSourceBuilder.aggregation(rangeAggregationBuilder);
    searchRequest.source(searchSourceBuilder);

    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
    Range rangeAggs = searchResponse.getAggregations().get("price_range_aggs");
    List<? extends Range.Bucket> buckets = rangeAggs.getBuckets();
    for (Range.Bucket bucket : buckets) {
        System.out.println("key:" + bucket.getKeyAsString() + "\ndoc_count:" + bucket.getDocCount());
    }
}

(5) date_range

按字段的值範圍進行分組, 比如按sold_date範圍分組. date_range只能作用於date類型.

private static void dateRangeAggs(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //aggs
    DateRangeAggregationBuilder dateRangeAggregationBuilder = AggregationBuilders.dateRange("date_range_aggs").field("sold_date").addUnboundedFrom("start", "2019-10-31").addRange("middle", "2019-10-31", "2020-01-01").addUnboundedTo("end", "now");
    searchSourceBuilder.aggregation(dateRangeAggregationBuilder);
    searchRequest.source(searchSourceBuilder);

    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
    Range rangeAggs = searchResponse.getAggregations().get("date_range_aggs");
    List<? extends Range.Bucket> buckets = rangeAggs.getBuckets();
    for (Range.Bucket bucket : buckets) {
        System.out.println("key:" + bucket.getKeyAsString() + "\ndoc_count:" + bucket.getDocCount());
    }
}

(6) histogram

構建一個直方圖, 如按照價格區間分組. histogram只能作用於數值類型.

private static void histogramAggs(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //aggs
    HistogramAggregationBuilder histogramAggregationBuilder = AggregationBuilders.histogram("histogram_aggs").field("price").interval(2000);
    searchSourceBuilder.aggregation(histogramAggregationBuilder);
    searchRequest.source(searchSourceBuilder);

    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
    Histogram histogramAggs = searchResponse.getAggregations().get("histogram_aggs");
    List<? extends Histogram.Bucket> buckets = histogramAggs.getBuckets();
    for (Histogram.Bucket bucket : buckets) {
        System.out.println("key:" + bucket.getKeyAsString() + "\ndoc_count:" + bucket.getDocCount());
    }
}

(7) date_histogram

構建一個直方圖, 如按照sold_date區間分組. date_histogram只能作用於date類型.

private static void dateHistogramAggs(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //aggs
    DateHistogramAggregationBuilder dateHistogramAggregationBuilder = AggregationBuilders.dateHistogram("date_histogram_aggs").field("sold_date").calendarInterval(DateHistogramInterval.MONTH).format("yyyy-MM-dd").minDocCount(0).extendedBounds(new ExtendedBounds("2019-05-01", "2020-02-01"));
    searchSourceBuilder.aggregation(dateHistogramAggregationBuilder);
    searchRequest.source(searchSourceBuilder);

    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
    Histogram histogramAggs = searchResponse.getAggregations().get("date_histogram_aggs");
    List<? extends Histogram.Bucket> buckets = histogramAggs.getBuckets();
    for (Histogram.Bucket bucket : buckets) {
        System.out.println("key:" + bucket.getKeyAsString() + "\ndoc_count:" + bucket.getDocCount());
    }
}

(8) filter

過濾分組, 將滿足條件的數據分爲一組. 比如分析最近6個月電視銷售的平均價格.

private static void filterAggs(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //aggs
    FilterAggregationBuilder filterAggregationBuilder = AggregationBuilders.filter("filter_aggs", new RangeQueryBuilder("sold_date").gte("now-6M"));
    //sub_aggs
    AvgAggregationBuilder avgAggregationBuilder = AggregationBuilders.avg("avg_price").field("price");
    filterAggregationBuilder.subAggregation(avgAggregationBuilder);
    searchSourceBuilder.aggregation(filterAggregationBuilder);
    searchRequest.source(searchSourceBuilder);

    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
    Filter filterAggs = searchResponse.getAggregations().get("filter_aggs");
    Avg avgPriceAggs = filterAggs.getAggregations().get("avg_price");
    System.out.println("doc_count: " + filterAggs.getDocCount() + "\navg_price: " + avgPriceAggs.getValue());
}

Metric Aggregation

Metric Aggregation是一系列數學運算, 可以對文檔字段進行統計分析, 類似於SQL語句分組後的統計功能.

(1) main方法

public static void main(String[] args) throws IOException {
    RestHighLevelClient client = new RestHighLevelClient(
            RestClient.builder(
                    new HttpHost("localhost", 9200, "http")));

    baseMetricAggs(client);

    cadinalityAggs(client);

    statAggs(client);

    topHitsAggs(client);

    percentilesAggs(client);

    percentilesRanksAggs(client);

    singleNestAggs(client);

    multiNestAggs(client);

    client.close();
}

(2) count/min/max/sum/avg

從上文的Bucket Aggregation實例中我們知道, 當進行Bucket Aggregation時, 默認會生成一個doc_count, 這個就是Count Aggregation.

min/max/sum/avg實例:

private static void baseMetricAggs(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //aggs
    MinAggregationBuilder minAggregationBuilder = AggregationBuilders.min("min_price").field("price");
    MaxAggregationBuilder maxAggregationBuilder = AggregationBuilders.max("max_price").field("price");
    SumAggregationBuilder sumAggregationBuilder = AggregationBuilders.sum("total_sales").field("price");
    AvgAggregationBuilder avgAggregationBuilder = AggregationBuilders.avg("avg_price").field("price");
    searchSourceBuilder.aggregation(minAggregationBuilder);
    searchSourceBuilder.aggregation(maxAggregationBuilder);
    searchSourceBuilder.aggregation(sumAggregationBuilder);
    searchSourceBuilder.aggregation(avgAggregationBuilder);
    searchRequest.source(searchSourceBuilder);

    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
    Max maxPriceAggs = searchResponse.getAggregations().get("max_price");
    Min minPriceAggs = searchResponse.getAggregations().get("min_price");
    Avg avgPriceAggs = searchResponse.getAggregations().get("avg_price");
    Sum totalPriceAggs = searchResponse.getAggregations().get("total_sales");
    System.out.println("max_price: " + maxPriceAggs.getValue());
    System.out.println("min_price: " + minPriceAggs.getValue());
    System.out.println("avg_price: " + avgPriceAggs.getValue());
    System.out.println("total_sales: " + totalPriceAggs.getValue());
}

(3) cardinality

類似於與SQL裏的distinct Count. 比如統計售出的電視品牌數.

private static void cadinalityAggs(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //aggs
    CardinalityAggregationBuilder cardinalityAggregationBuilder = AggregationBuilders.cardinality("cardinality_aggs").field("brand.keyword");
    searchSourceBuilder.aggregation(cardinalityAggregationBuilder);
    searchRequest.source(searchSourceBuilder);

    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
    Cardinality cardinalityAggs = searchResponse.getAggregations().get("cardinality_aggs");
    System.out.println("cardinality_aggs: " + cardinalityAggs.getValue());
}

(4) stats

統計count, min, max, sum, avg.

private static void statAggs(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //aggs
    StatsAggregationBuilder statsAggregationBuilder = AggregationBuilders.stats("stat_price_aggs").field("price");
    searchSourceBuilder.aggregation(statsAggregationBuilder);
    searchRequest.source(searchSourceBuilder);

    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
    Stats statPriceAggs = searchResponse.getAggregations().get("stat_price_aggs");
    System.out.println("max_price: " + statPriceAggs.getMax());
    System.out.println("min_price: " + statPriceAggs.getMin());
    System.out.println("avg_price: " + statPriceAggs.getAvg());
    System.out.println("total_sales: " + statPriceAggs.getSum());
}

(5) top_hits

返回排在前面的結果, 與sort聯用. 比如統計前三銷售價格.

private static void topHitsAggs(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //aggs
    TopHitsAggregationBuilder topHitsAggregationBuilder = AggregationBuilders.topHits("top_hits_aggs").size(3).sort("price", SortOrder.DESC);
    searchSourceBuilder.aggregation(topHitsAggregationBuilder);
    searchRequest.source(searchSourceBuilder);

    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
    TopHits topHitsAggs = searchResponse.getAggregations().get("top_hits_aggs");
    SearchHit[] hits = topHitsAggs.getHits().getHits();
    for (SearchHit hit : hits) {
        System.out.println(hit.getSourceAsString());
    }
}

(6) percentiles

percentiles表示觀察值在某個百分比的最大值. 比如統計50%, 90% 和 99%的電視的最大價格.

private static void percentilesAggs(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //aggs
    PercentilesAggregationBuilder percentilesAggregationBuilder = AggregationBuilders.percentiles("percentile_aggs").field("price").percentiles(50, 90, 99);
    searchSourceBuilder.aggregation(percentilesAggregationBuilder);
    searchRequest.source(searchSourceBuilder);

    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
    Percentiles percentileAggs = searchResponse.getAggregations().get("percentile_aggs");
    for (Percentile next : percentileAggs) {
        System.out.println(next.getPercent() + ": " + next.getValue());
    }
}

(7) percentile_ranks

percentile_ranks表示觀察值低於一定值的百分比. 比如統計價格在2000以內, 5000以內, 10000以內的電視所佔比例.

private static void percentilesRanksAggs(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //aggs
    double[] values = {2000, 5000, 10000};
    PercentileRanksAggregationBuilder percentileRanksAggregationBuilder = AggregationBuilders.percentileRanks("percentile_ranks", values).field("price");
    searchSourceBuilder.aggregation(percentileRanksAggregationBuilder);
    searchRequest.source(searchSourceBuilder);

    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
    PercentileRanks percentileRanksAggs = searchResponse.getAggregations().get("percentile_ranks");
    for (Percentile next : percentileRanksAggs) {
        System.out.println(next.getValue() + ": " + next.getPercent());
    }
}

Bucket + Metric Aggregation

Bucket 聚合分析允許通過添加子聚合分析來進⼀步分析, 子聚合分析可以是Bucket, 也可以是Metric.

(1) 一層嵌套, 如按品牌分組, 統計價格信息

private static void singleNestAggs(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //aggs
    TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("group_by_brand").field("brand.keyword");
    //subAggs
    StatsAggregationBuilder statsAggregationBuilder = AggregationBuilders.stats("stats_price").field("price");
    termsAggregationBuilder.subAggregation(statsAggregationBuilder);
    searchSourceBuilder.aggregation(termsAggregationBuilder);
    searchRequest.source(searchSourceBuilder);

    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
    Terms termsAggs = searchResponse.getAggregations().get("group_by_brand");
    List<? extends Terms.Bucket> buckets = termsAggs.getBuckets();
    for (Terms.Bucket bucket : buckets) {
        Stats statsPriceAggs = bucket.getAggregations().get("stats_price");
        System.out.println("count: " + statsPriceAggs.getCount());
        System.out.println("max: " + statsPriceAggs.getMax());
        System.out.println("min: " + statsPriceAggs.getMin());
        System.out.println("avg: " + statsPriceAggs.getAvg());
        System.out.println("total: " + statsPriceAggs.getSum());
    }
}

(2) 多層嵌套, 如先按品牌分組, 然後按顏色分組, 統計價格信息

private static void multiNestAggs(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //aggs
    TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("group_by_brand").field("brand.keyword");
    //subAggs1
    TermsAggregationBuilder termsAggregationBuilder2 = AggregationBuilders.terms("group_by_color").field("color.keyword");
    //subAggs2
    StatsAggregationBuilder statsAggregationBuilder = AggregationBuilders.stats("stats_price").field("price");
    termsAggregationBuilder2.subAggregation(statsAggregationBuilder);
    termsAggregationBuilder.subAggregation(termsAggregationBuilder2);
    searchSourceBuilder.aggregation(termsAggregationBuilder);
    searchRequest.source(searchSourceBuilder);

    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
    Terms termsAggs = searchResponse.getAggregations().get("group_by_brand");
    List<? extends Terms.Bucket> buckets = termsAggs.getBuckets();
    for (Terms.Bucket bucket : buckets) {
        Terms groupByColorAggs = bucket.getAggregations().get("group_by_color");
        List<? extends Terms.Bucket> colorAggsBuckets = groupByColorAggs.getBuckets();
        for (Terms.Bucket colorAggsBucket : colorAggsBuckets) {
            Stats statsPriceAggs = colorAggsBucket.getAggregations().get("stats_price");
            System.out.println("count: " + statsPriceAggs.getCount());
            System.out.println("max: " + statsPriceAggs.getMax());
            System.out.println("min: " + statsPriceAggs.getMin());
            System.out.println("avg: " + statsPriceAggs.getAvg());
            System.out.println("total: " + statsPriceAggs.getSum());
        }
    }
}

Pipeline Aggregation

對聚合分析的結果再次進行聚合分析.

(1) main方法

public static void main(String[] args) throws IOException {
    RestHighLevelClient client = new RestHighLevelClient(
            RestClient.builder(
                    new HttpHost("localhost", 9200, "http")));

    minBucketAggs(client);

    statBucketAggs(client);

    percentilesBucketAggs(client);

    derivativeAggs(client);

    cumulativeSumAggs(client);

    moveFnAggs(client);

    client.close();
}

(2) min_bucket

平均價格最低的品牌

private static void minBucketAggs(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //aggs
    TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("group_by_brand").field("brand.keyword");
    //subAggs
    AvgAggregationBuilder avgAggregationBuilder = AggregationBuilders.avg("avg_price").field("price");
    termsAggregationBuilder.subAggregation(avgAggregationBuilder);
    searchSourceBuilder.aggregation(termsAggregationBuilder);
    //sublingAggs
    MinBucketPipelineAggregationBuilder minBucketPipelineAggregationBuilder = PipelineAggregatorBuilders.minBucket("min_price_by_brand", "group_by_brand>avg_price");
    searchSourceBuilder.aggregation(minBucketPipelineAggregationBuilder);
    searchRequest.source(searchSourceBuilder);

    //result
    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
    Terms termAggs = searchResponse.getAggregations().get("group_by_brand");
    List<? extends Terms.Bucket> buckets = termAggs.getBuckets();
    for (Terms.Bucket bucket : buckets) {
        Avg avgPriceAggs = bucket.getAggregations().get("avg_price");
        System.out.println("key:" + bucket.getKeyAsString() + "\ndoc_count:" + bucket.getDocCount() + "\navg_price:" + avgPriceAggs.getValue());
    }

    System.out.println();
    BucketMetricValue bucketMetricValue = searchResponse.getAggregations().get("min_price_by_brand");
    System.out.println("value: " + bucketMetricValue.getValueAsString() + "\nkeys:" + Arrays.toString(bucketMetricValue.keys()));
}

(2) stats_bucket

所有品牌電視的平均價格的統計分析.

private static void statBucketAggs(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //aggs
    TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("group_by_brand").field("brand.keyword");
    //subAggs
    AvgAggregationBuilder avgAggregationBuilder = AggregationBuilders.avg("avg_price").field("price");
    termsAggregationBuilder.subAggregation(avgAggregationBuilder);
    searchSourceBuilder.aggregation(termsAggregationBuilder);
    //sublingAggs
    StatsBucketPipelineAggregationBuilder statsBucketPipelineAggregationBuilder = PipelineAggregatorBuilders.statsBucket("stats_price_by_brand", "group_by_brand>avg_price");
    searchSourceBuilder.aggregation(statsBucketPipelineAggregationBuilder);
    searchRequest.source(searchSourceBuilder);

    //result
    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
    Terms termAggs = searchResponse.getAggregations().get("group_by_brand");
    List<? extends Terms.Bucket> buckets = termAggs.getBuckets();
    for (Terms.Bucket bucket : buckets) {
        Avg avgPriceAggs = bucket.getAggregations().get("avg_price");
        System.out.println("key:" + bucket.getKeyAsString() + "\ndoc_count:" + bucket.getDocCount() + "\navg_price:" + avgPriceAggs.getValue());
    }

    System.out.println();
    StatsBucket statsBucket = searchResponse.getAggregations().get("stats_price_by_brand");
    System.out.println("count: " + statsBucket.getCount());
    System.out.println("max: " + statsBucket.getMax());
    System.out.println("min: " + statsBucket.getMin());
    System.out.println("avg: " + statsBucket.getAvg());
    System.out.println("total: " + statsBucket.getSum());
}

(3) percentiles_bucket

所有品牌電視的平均價格的百分比統計

private static void percentilesBucketAggs(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //aggs
    TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("group_by_brand").field("brand.keyword");
    //subAggs
    AvgAggregationBuilder avgAggregationBuilder = AggregationBuilders.avg("avg_price").field("price");
    termsAggregationBuilder.subAggregation(avgAggregationBuilder);
    searchSourceBuilder.aggregation(termsAggregationBuilder);
    //sublingAggs
    double[] percents = {50, 90, 99};
    PercentilesBucketPipelineAggregationBuilder percentilesBucketPipelineAggregationBuilder = PipelineAggregatorBuilders.percentilesBucket("percentiles_price_by_brand", "group_by_brand>avg_price").setPercents(percents);
    searchSourceBuilder.aggregation(percentilesBucketPipelineAggregationBuilder);
    searchRequest.source(searchSourceBuilder);

    //result
    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
    Terms termAggs = searchResponse.getAggregations().get("group_by_brand");
    List<? extends Terms.Bucket> buckets = termAggs.getBuckets();
    for (Terms.Bucket bucket : buckets) {
        Avg avgPriceAggs = bucket.getAggregations().get("avg_price");
        System.out.println("key:" + bucket.getKeyAsString() + "\ndoc_count:" + bucket.getDocCount() + "\navg_price:" + avgPriceAggs.getValue());
    }

    System.out.println();
    ParsedPercentilesBucket percentilesBucket = searchResponse.getAggregations().get("percentiles_price_by_brand");
    for (Percentile percentile : percentilesBucket) {
        System.out.println(percentile.getPercent() + ": " + percentile.getValue());
    }
}

(4) derivative

對按照sold_date分組的平均價格求導

private static void derivativeAggs(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //aggs
    DateHistogramAggregationBuilder dateHistogramAggregationBuilder = AggregationBuilders.dateHistogram("sold_date_aggs").field("sold_date").calendarInterval(DateHistogramInterval.MONTH).format("yyyy-MM-dd");
    //subAggs
    AvgAggregationBuilder avgAggregationBuilder = AggregationBuilders.avg("avg_price_aggs").field("price");
    dateHistogramAggregationBuilder.subAggregation(avgAggregationBuilder);
    //parentAggs
    DerivativePipelineAggregationBuilder derivativePipelineAggregationBuilder = PipelineAggregatorBuilders.derivative("derivative_avg_price_aggs", "avg_price_aggs");
    dateHistogramAggregationBuilder.subAggregation(derivativePipelineAggregationBuilder);

    searchSourceBuilder.aggregation(dateHistogramAggregationBuilder);
    searchRequest.source(searchSourceBuilder);

    //result
    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
    Histogram histogramAggs = searchResponse.getAggregations().get("sold_date_aggs");
    List<? extends Histogram.Bucket> buckets = histogramAggs.getBuckets();
    for (Histogram.Bucket bucket : buckets) {
        Avg avgPriceAggs = bucket.getAggregations().get("avg_price_aggs");
        System.out.println("key:" + bucket.getKeyAsString() + "\ndoc_count:" + bucket.getDocCount() + "\navg_price:" + avgPriceAggs.getValue());
        Derivative derivativeAvgPriceAggs = bucket.getAggregations().get("derivative_avg_price_aggs");
        if (derivativeAvgPriceAggs != null) {
            System.out.println(derivativeAvgPriceAggs.normalizedValue());
        }
    }
}

(5) cumulative_sum

對按照sold_date分組的平均價格累計求和.

private static void cumulativeSumAggs(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //aggs
    DateHistogramAggregationBuilder dateHistogramAggregationBuilder = AggregationBuilders.dateHistogram("sold_date_aggs").field("sold_date").calendarInterval(DateHistogramInterval.MONTH).format("yyyy-MM-dd");
    //subAggs
    AvgAggregationBuilder avgAggregationBuilder = AggregationBuilders.avg("avg_price_aggs").field("price");
    dateHistogramAggregationBuilder.subAggregation(avgAggregationBuilder);
    //parentAggs
    CumulativeSumPipelineAggregationBuilder cumulativeSumPipelineAggregationBuilder = PipelineAggregatorBuilders.cumulativeSum("cumulative_sum_avg_price_aggs", "avg_price_aggs");
    dateHistogramAggregationBuilder.subAggregation(cumulativeSumPipelineAggregationBuilder);

    searchSourceBuilder.aggregation(dateHistogramAggregationBuilder);
    searchRequest.source(searchSourceBuilder);

    //result
    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
    Histogram histogramAggs = searchResponse.getAggregations().get("sold_date_aggs");
    List<? extends Histogram.Bucket> buckets = histogramAggs.getBuckets();
    for (Histogram.Bucket bucket : buckets) {
        Avg avgPriceAggs = bucket.getAggregations().get("avg_price_aggs");
        System.out.println("key:" + bucket.getKeyAsString() + "\ndoc_count:" + bucket.getDocCount() + "\navg_price:" + avgPriceAggs.getValue());
        ParsedSimpleValue parsedSimpleValue = bucket.getAggregations().get("cumulative_sum_avg_price_aggs");
        if (parsedSimpleValue != null) {
            System.out.println("cumulative_sum_avg_price_aggs: " + parsedSimpleValue.getValueAsString());
        }
    }
}

(6) moving_fn

對按照sold_date分組的平均價格, 按時間窗口求最小平均價格

private static void moveFnAggs(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //aggs
    DateHistogramAggregationBuilder dateHistogramAggregationBuilder = AggregationBuilders.dateHistogram("sold_date_aggs").field("sold_date").calendarInterval(DateHistogramInterval.MONTH).format("yyyy-MM-dd");
    //subAggs
    AvgAggregationBuilder avgAggregationBuilder = AggregationBuilders.avg("avg_price_aggs").field("price");
    dateHistogramAggregationBuilder.subAggregation(avgAggregationBuilder);
    //parentAggs
    MovFnPipelineAggregationBuilder movFnPipelineAggregationBuilder = PipelineAggregatorBuilders.movingFunction("moving_fn_avg_price_aggs", new Script("MovingFunctions.min(values)"), "avg_price_aggs", 10);
    dateHistogramAggregationBuilder.subAggregation(movFnPipelineAggregationBuilder);

    searchSourceBuilder.aggregation(dateHistogramAggregationBuilder);
    searchRequest.source(searchSourceBuilder);

    //result
    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
    Histogram histogramAggs = searchResponse.getAggregations().get("sold_date_aggs");
    List<? extends Histogram.Bucket> buckets = histogramAggs.getBuckets();
    for (Histogram.Bucket bucket : buckets) {
        Avg avgPriceAggs = bucket.getAggregations().get("avg_price_aggs");
        System.out.println("key:" + bucket.getKeyAsString() + "\ndoc_count:" + bucket.getDocCount() + "\navg_price:" + avgPriceAggs.getValue());
        ParsedSimpleValue parsedSimpleValue = bucket.getAggregations().get("moving_fn_avg_price_aggs");
        if (parsedSimpleValue != null) {
            System.out.println("moving_fn_avg_price_aggs: " + parsedSimpleValue.getValueAsString());
        }
    }
}

Matrix Aggregation

支持對多個字段進行統計分析, 併爲每個字段提供一個結果矩陣.

private static void moveFnAggs(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //aggs
    List<String> fields = new ArrayList<>();
    fields.add("price");
    MatrixStatsAggregationBuilder matrixStatsAggregationBuilder = MatrixStatsAggregationBuilders.matrixStats("statistics").fields(fields);
    searchSourceBuilder.aggregation(matrixStatsAggregationBuilder);
    searchRequest.source(searchSourceBuilder);

    //result
    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
    ParsedMatrixStats parsedMatrixStats = searchResponse.getAggregations().get("statistics");
    System.out.println("count: " + parsedMatrixStats.getFieldCount("price"));
    System.out.println("mean: " + parsedMatrixStats.getMean("price"));
    System.out.println("variance: " + parsedMatrixStats.getVariance("price"));
    System.out.println("skewness: " + parsedMatrixStats.getSkewness("price"));
    System.out.println("kurtosis: " + parsedMatrixStats.getKurtosis("price"));
    System.out.println("covariance: " + parsedMatrixStats.getCovariance("price","price"));
    System.out.println("correlation: " + parsedMatrixStats.getCorrelation("price", "price"));
}

聚合的作用範圍及排序

Elasticsearch聚合分析的默認作用範圍是query的查詢結果集, 同時Elasticsearch還支持以下方式改變聚合的作用範圍.

  • filter
  • post_filter
  • global

(1) main方法

public static void main(String[] args) throws IOException {
    RestHighLevelClient client = new RestHighLevelClient(
            RestClient.builder(
                    new HttpHost("localhost", 9200, "http")));
    
    query(client);

    filter(client);

    postFilter(client);

    global(client);

    orderAggs(client);

    orderSubAggs(client);

    client.close();
}

(2) query

先使用query進行過濾, 然後再進行聚合操作.

private static void query(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //query
    RangeQueryBuilder rangeQueryBuilder = QueryBuilders.rangeQuery("price").gte(2000);
    searchSourceBuilder.query(rangeQueryBuilder);
    //aggs
    TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("group_by_brand").field("brand.keyword");
    searchSourceBuilder.aggregation(termsAggregationBuilder);

    searchRequest.source(searchSourceBuilder);
    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);

    //result
    Terms termsAggs = searchResponse.getAggregations().get("group_by_brand");
    List<? extends Terms.Bucket> buckets = termsAggs.getBuckets();
    for (Terms.Bucket bucket : buckets) {
        System.out.println("key: " + bucket.getKeyAsString() + "\ndoc_count: " + bucket.getDocCount());
    }
}

(3) filter

聚合內的filter只對當前的子聚合語句生效.

private static void filter(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //aggs1
    RangeQueryBuilder rangeQueryBuilder = QueryBuilders.rangeQuery("price").gte(2000);
    FilterAggregationBuilder filterAggregationBuilder = AggregationBuilders.filter("price_filter", rangeQueryBuilder);
    //subAggs
    TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("group_by_brand").field("brand.keyword");
    filterAggregationBuilder.subAggregation(termsAggregationBuilder);
    searchSourceBuilder.aggregation(filterAggregationBuilder);

    //aggs2
    TermsAggregationBuilder termsAggregationBuilder2 = AggregationBuilders.terms("group_by_all_brand").field("brand.keyword");
    searchSourceBuilder.aggregation(termsAggregationBuilder2);

    searchRequest.source(searchSourceBuilder);
    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);

    //result
    Terms termsAggs = searchResponse.getAggregations().get("group_by_all_brand");
    List<? extends Terms.Bucket> buckets = termsAggs.getBuckets();
    for (Terms.Bucket bucket : buckets) {
        System.out.println("key: " + bucket.getKeyAsString() + "\ndoc_count: " + bucket.getDocCount());
    }

    Filter filterAggs = searchResponse.getAggregations().get("price_filter");
    Terms termsAggs2 = filterAggs.getAggregations().get("group_by_brand");
    List<? extends Terms.Bucket> buckets2 = termsAggs2.getBuckets();
    for (Terms.Bucket bucket : buckets2) {
        System.out.println("key: " + bucket.getKeyAsString() + "\ndoc_count: " + bucket.getDocCount());
    }
}

(4) post_filter

post_filter是對聚合分析後的文檔再次過濾, size不要設置爲0.

private static void postFilter(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    //aggs
    TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("group_by_brand").field("brand.keyword");
    searchSourceBuilder.aggregation(termsAggregationBuilder);
    //post_filter
    TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("brand.keyword", "小米");
    searchSourceBuilder.postFilter(termQueryBuilder);

    searchRequest.source(searchSourceBuilder);
    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);

    //result
    Terms termsAggs = searchResponse.getAggregations().get("group_by_brand");
    List<? extends Terms.Bucket> buckets = termsAggs.getBuckets();
    for (Terms.Bucket bucket : buckets) {
        System.out.println("key: " + bucket.getKeyAsString() + "\ndoc_count: " + bucket.getDocCount());
    }

    SearchHit[] hits = searchResponse.getHits().getHits();
    for (SearchHit hit : hits) {
        System.out.println(hit.getSourceAsString());
    }
}

(5) global

global無視query過濾, 對全部文檔進行統計.

private static void global(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //query
    RangeQueryBuilder rangeQueryBuilder = QueryBuilders.rangeQuery("price").gte(2000);
    searchSourceBuilder.query(rangeQueryBuilder);
    //aggs
    TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("group_by_brand").field("brand.keyword");
    searchSourceBuilder.aggregation(termsAggregationBuilder);
    //subAggs
    AvgAggregationBuilder avgAggregationBuilder = AggregationBuilders.avg("avg_price").field("price");
    GlobalAggregationBuilder globalAggregationBuilder = AggregationBuilders.global("all_price_aggs").subAggregation(avgAggregationBuilder);
    searchSourceBuilder.aggregation(globalAggregationBuilder);

    searchRequest.source(searchSourceBuilder);
    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);

    //result
    Terms termsAggs = searchResponse.getAggregations().get("group_by_brand");
    List<? extends Terms.Bucket> buckets = termsAggs.getBuckets();
    for (Terms.Bucket bucket : buckets) {
        System.out.println("key: " + bucket.getKeyAsString() + "\ndoc_count: " + bucket.getDocCount());
    }

    Global allPriceAggs = searchResponse.getAggregations().get("all_price_aggs");
    Avg avgPriceAggs = allPriceAggs.getAggregations().get("avg_price");
    System.out.println("\ndoc_count: " + allPriceAggs.getDocCount() + "\navg_price: " + avgPriceAggs.getValue());
}

排序

通過order字段來指定排序, 默認情況下根據doc_count降序排列.

(1) 根據doc_count升序, key降序來排序

private static void orderAggs(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //aggs
    List<BucketOrder> orders = new ArrayList<>();
    orders.add(BucketOrder.count(true));
    orders.add(BucketOrder.key(false));
    TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("group_by_brand").field("brand.keyword").order(orders);
    searchSourceBuilder.aggregation(termsAggregationBuilder);

    searchRequest.source(searchSourceBuilder);
    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);

    //result
    Terms termsAggs = searchResponse.getAggregations().get("group_by_brand");
    List<? extends Terms.Bucket> buckets = termsAggs.getBuckets();
    for (Terms.Bucket bucket : buckets) {
        System.out.println("key: " + bucket.getKeyAsString() + "\ndoc_count: " + bucket.getDocCount());
    }
}

(2) 基於子聚合的值排序

private static void orderSubAggs(RestHighLevelClient client) throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    //aggs
    BucketOrder bucketOrder = BucketOrder.aggregation("avg_price", false);
    TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("group_by_brand").field("brand.keyword").order(bucketOrder);
    searchSourceBuilder.aggregation(termsAggregationBuilder);
    AvgAggregationBuilder avgAggregationBuilder = AggregationBuilders.avg("avg_price").field("price");
    termsAggregationBuilder.subAggregation(avgAggregationBuilder);

    searchRequest.source(searchSourceBuilder);
    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);

    //result
    Terms termsAggs = searchResponse.getAggregations().get("group_by_brand");
    List<? extends Terms.Bucket> buckets = termsAggs.getBuckets();
    for (Terms.Bucket bucket : buckets) {
        Avg avgPriceAggs = bucket.getAggregations().get("avg_price");
        System.out.println("key: " + bucket.getKeyAsString() + "\ndoc_count: " + bucket.getDocCount() + "\navg_price: " + avgPriceAggs.getValue());
    }
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章