本文基於Elasticsearch7.x
本文將上篇Elasticsearch聚合分析Rest API裏的實例轉化爲Java Client
Bucket Aggregation
Bucket Aggregation是一系列滿足特定條件的文檔的集合, 類似於SQL語句裏的分組功能.
(1) main方法
public static void main(String[] args) throws IOException {
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
new HttpHost("localhost", 9200, "http")));
bulkIndex(client);
termAggs(client);
rangeAggs(client);
dateRangeAggs(client);
histogramAggs(client);
dateHistogramAggs(client);
filterAggs(client);
client.close();
}
(2) 添加數據
新增電視機銷售記錄, 用於接下來的聚合分析.
private static void bulkIndex(RestHighLevelClient client) throws IOException {
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.add(new IndexRequest("sales").id("1")
.source(XContentType.JSON, "price", 1000, "color", "紅色", "brand", "長虹", "sold_date", "2019-10-28"));
bulkRequest.add(new IndexRequest("sales").id("2")
.source(XContentType.JSON, "price", 2000, "color", "紅色", "brand", "長虹", "sold_date", "2019-11-05"));
bulkRequest.add(new IndexRequest("sales").id("3")
.source(XContentType.JSON, "price", 3000, "color", "綠色", "brand", "小米", "sold_date", "2019-05-18"));
bulkRequest.add(new IndexRequest("sales").id("4")
.source(XContentType.JSON, "price", 1500, "color", "藍色", "brand", "TCL", "sold_date", "2019-07-02"));
bulkRequest.add(new IndexRequest("sales").id("5")
.source(XContentType.JSON, "price", 1200, "color", "綠色", "brand", "TCL", "sold_date", "2019-08-19"));
bulkRequest.add(new IndexRequest("sales").id("6")
.source(XContentType.JSON, "price", 2000, "color", "紅色", "brand", "長虹", "sold_date", "2019-11-05"));
bulkRequest.add(new IndexRequest("sales").id("7")
.source(XContentType.JSON, "price", 8000, "color", "紅色", "brand", "三星", "sold_date", "2020-01-01"));
bulkRequest.add(new IndexRequest("sales").id("8")
.source(XContentType.JSON, "price", 2500, "color", "藍色", "brand", "小米", "sold_date", "2020-02-12"));
client.bulk(bulkRequest, RequestOptions.DEFAULT);
}
(3) terms
按某個字段進行分組, 比如按品牌進行分組.
private static void termAggs(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//aggs
TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("term_aggs").field("brand.keyword");
searchSourceBuilder.aggregation(termsAggregationBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Terms termAggs = searchResponse.getAggregations().get("term_aggs");
List<? extends Terms.Bucket> buckets = termAggs.getBuckets();
for (Terms.Bucket bucket : buckets) {
System.out.println("key:" + bucket.getKeyAsString() + "\ndoc_count:" + bucket.getDocCount());
}
}
(4) range
按字段的值範圍進行分組, 比如按電視價格範圍分組. range只能作用於數值類型.
private static void rangeAggs(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//aggs
RangeAggregationBuilder rangeAggregationBuilder = AggregationBuilders.range("price_range_aggs").field("price").addUnboundedTo(1000).addRange(1000, 3000).addUnboundedFrom(3000);
searchSourceBuilder.aggregation(rangeAggregationBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Range rangeAggs = searchResponse.getAggregations().get("price_range_aggs");
List<? extends Range.Bucket> buckets = rangeAggs.getBuckets();
for (Range.Bucket bucket : buckets) {
System.out.println("key:" + bucket.getKeyAsString() + "\ndoc_count:" + bucket.getDocCount());
}
}
(5) date_range
按字段的值範圍進行分組, 比如按sold_date範圍分組. date_range只能作用於date類型.
private static void dateRangeAggs(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//aggs
DateRangeAggregationBuilder dateRangeAggregationBuilder = AggregationBuilders.dateRange("date_range_aggs").field("sold_date").addUnboundedFrom("start", "2019-10-31").addRange("middle", "2019-10-31", "2020-01-01").addUnboundedTo("end", "now");
searchSourceBuilder.aggregation(dateRangeAggregationBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Range rangeAggs = searchResponse.getAggregations().get("date_range_aggs");
List<? extends Range.Bucket> buckets = rangeAggs.getBuckets();
for (Range.Bucket bucket : buckets) {
System.out.println("key:" + bucket.getKeyAsString() + "\ndoc_count:" + bucket.getDocCount());
}
}
(6) histogram
構建一個直方圖, 如按照價格區間分組. histogram只能作用於數值類型.
private static void histogramAggs(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//aggs
HistogramAggregationBuilder histogramAggregationBuilder = AggregationBuilders.histogram("histogram_aggs").field("price").interval(2000);
searchSourceBuilder.aggregation(histogramAggregationBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Histogram histogramAggs = searchResponse.getAggregations().get("histogram_aggs");
List<? extends Histogram.Bucket> buckets = histogramAggs.getBuckets();
for (Histogram.Bucket bucket : buckets) {
System.out.println("key:" + bucket.getKeyAsString() + "\ndoc_count:" + bucket.getDocCount());
}
}
(7) date_histogram
構建一個直方圖, 如按照sold_date區間分組. date_histogram只能作用於date類型.
private static void dateHistogramAggs(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//aggs
DateHistogramAggregationBuilder dateHistogramAggregationBuilder = AggregationBuilders.dateHistogram("date_histogram_aggs").field("sold_date").calendarInterval(DateHistogramInterval.MONTH).format("yyyy-MM-dd").minDocCount(0).extendedBounds(new ExtendedBounds("2019-05-01", "2020-02-01"));
searchSourceBuilder.aggregation(dateHistogramAggregationBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Histogram histogramAggs = searchResponse.getAggregations().get("date_histogram_aggs");
List<? extends Histogram.Bucket> buckets = histogramAggs.getBuckets();
for (Histogram.Bucket bucket : buckets) {
System.out.println("key:" + bucket.getKeyAsString() + "\ndoc_count:" + bucket.getDocCount());
}
}
(8) filter
過濾分組, 將滿足條件的數據分爲一組. 比如分析最近6個月電視銷售的平均價格.
private static void filterAggs(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//aggs
FilterAggregationBuilder filterAggregationBuilder = AggregationBuilders.filter("filter_aggs", new RangeQueryBuilder("sold_date").gte("now-6M"));
//sub_aggs
AvgAggregationBuilder avgAggregationBuilder = AggregationBuilders.avg("avg_price").field("price");
filterAggregationBuilder.subAggregation(avgAggregationBuilder);
searchSourceBuilder.aggregation(filterAggregationBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Filter filterAggs = searchResponse.getAggregations().get("filter_aggs");
Avg avgPriceAggs = filterAggs.getAggregations().get("avg_price");
System.out.println("doc_count: " + filterAggs.getDocCount() + "\navg_price: " + avgPriceAggs.getValue());
}
Metric Aggregation
Metric Aggregation是一系列數學運算, 可以對文檔字段進行統計分析, 類似於SQL語句分組後的統計功能.
(1) main方法
public static void main(String[] args) throws IOException {
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
new HttpHost("localhost", 9200, "http")));
baseMetricAggs(client);
cadinalityAggs(client);
statAggs(client);
topHitsAggs(client);
percentilesAggs(client);
percentilesRanksAggs(client);
singleNestAggs(client);
multiNestAggs(client);
client.close();
}
(2) count/min/max/sum/avg
從上文的Bucket Aggregation實例中我們知道, 當進行Bucket Aggregation時, 默認會生成一個doc_count, 這個就是Count Aggregation.
min/max/sum/avg實例:
private static void baseMetricAggs(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//aggs
MinAggregationBuilder minAggregationBuilder = AggregationBuilders.min("min_price").field("price");
MaxAggregationBuilder maxAggregationBuilder = AggregationBuilders.max("max_price").field("price");
SumAggregationBuilder sumAggregationBuilder = AggregationBuilders.sum("total_sales").field("price");
AvgAggregationBuilder avgAggregationBuilder = AggregationBuilders.avg("avg_price").field("price");
searchSourceBuilder.aggregation(minAggregationBuilder);
searchSourceBuilder.aggregation(maxAggregationBuilder);
searchSourceBuilder.aggregation(sumAggregationBuilder);
searchSourceBuilder.aggregation(avgAggregationBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Max maxPriceAggs = searchResponse.getAggregations().get("max_price");
Min minPriceAggs = searchResponse.getAggregations().get("min_price");
Avg avgPriceAggs = searchResponse.getAggregations().get("avg_price");
Sum totalPriceAggs = searchResponse.getAggregations().get("total_sales");
System.out.println("max_price: " + maxPriceAggs.getValue());
System.out.println("min_price: " + minPriceAggs.getValue());
System.out.println("avg_price: " + avgPriceAggs.getValue());
System.out.println("total_sales: " + totalPriceAggs.getValue());
}
(3) cardinality
類似於與SQL裏的distinct Count. 比如統計售出的電視品牌數.
private static void cadinalityAggs(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//aggs
CardinalityAggregationBuilder cardinalityAggregationBuilder = AggregationBuilders.cardinality("cardinality_aggs").field("brand.keyword");
searchSourceBuilder.aggregation(cardinalityAggregationBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Cardinality cardinalityAggs = searchResponse.getAggregations().get("cardinality_aggs");
System.out.println("cardinality_aggs: " + cardinalityAggs.getValue());
}
(4) stats
統計count, min, max, sum, avg.
private static void statAggs(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//aggs
StatsAggregationBuilder statsAggregationBuilder = AggregationBuilders.stats("stat_price_aggs").field("price");
searchSourceBuilder.aggregation(statsAggregationBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Stats statPriceAggs = searchResponse.getAggregations().get("stat_price_aggs");
System.out.println("max_price: " + statPriceAggs.getMax());
System.out.println("min_price: " + statPriceAggs.getMin());
System.out.println("avg_price: " + statPriceAggs.getAvg());
System.out.println("total_sales: " + statPriceAggs.getSum());
}
(5) top_hits
返回排在前面的結果, 與sort聯用. 比如統計前三銷售價格.
private static void topHitsAggs(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//aggs
TopHitsAggregationBuilder topHitsAggregationBuilder = AggregationBuilders.topHits("top_hits_aggs").size(3).sort("price", SortOrder.DESC);
searchSourceBuilder.aggregation(topHitsAggregationBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
TopHits topHitsAggs = searchResponse.getAggregations().get("top_hits_aggs");
SearchHit[] hits = topHitsAggs.getHits().getHits();
for (SearchHit hit : hits) {
System.out.println(hit.getSourceAsString());
}
}
(6) percentiles
percentiles表示觀察值在某個百分比的最大值. 比如統計50%, 90% 和 99%的電視的最大價格.
private static void percentilesAggs(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//aggs
PercentilesAggregationBuilder percentilesAggregationBuilder = AggregationBuilders.percentiles("percentile_aggs").field("price").percentiles(50, 90, 99);
searchSourceBuilder.aggregation(percentilesAggregationBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Percentiles percentileAggs = searchResponse.getAggregations().get("percentile_aggs");
for (Percentile next : percentileAggs) {
System.out.println(next.getPercent() + ": " + next.getValue());
}
}
(7) percentile_ranks
percentile_ranks表示觀察值低於一定值的百分比. 比如統計價格在2000以內, 5000以內, 10000以內的電視所佔比例.
private static void percentilesRanksAggs(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//aggs
double[] values = {2000, 5000, 10000};
PercentileRanksAggregationBuilder percentileRanksAggregationBuilder = AggregationBuilders.percentileRanks("percentile_ranks", values).field("price");
searchSourceBuilder.aggregation(percentileRanksAggregationBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
PercentileRanks percentileRanksAggs = searchResponse.getAggregations().get("percentile_ranks");
for (Percentile next : percentileRanksAggs) {
System.out.println(next.getValue() + ": " + next.getPercent());
}
}
Bucket + Metric Aggregation
Bucket 聚合分析允許通過添加子聚合分析來進⼀步分析, 子聚合分析可以是Bucket, 也可以是Metric.
(1) 一層嵌套, 如按品牌分組, 統計價格信息
private static void singleNestAggs(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//aggs
TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("group_by_brand").field("brand.keyword");
//subAggs
StatsAggregationBuilder statsAggregationBuilder = AggregationBuilders.stats("stats_price").field("price");
termsAggregationBuilder.subAggregation(statsAggregationBuilder);
searchSourceBuilder.aggregation(termsAggregationBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Terms termsAggs = searchResponse.getAggregations().get("group_by_brand");
List<? extends Terms.Bucket> buckets = termsAggs.getBuckets();
for (Terms.Bucket bucket : buckets) {
Stats statsPriceAggs = bucket.getAggregations().get("stats_price");
System.out.println("count: " + statsPriceAggs.getCount());
System.out.println("max: " + statsPriceAggs.getMax());
System.out.println("min: " + statsPriceAggs.getMin());
System.out.println("avg: " + statsPriceAggs.getAvg());
System.out.println("total: " + statsPriceAggs.getSum());
}
}
(2) 多層嵌套, 如先按品牌分組, 然後按顏色分組, 統計價格信息
private static void multiNestAggs(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//aggs
TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("group_by_brand").field("brand.keyword");
//subAggs1
TermsAggregationBuilder termsAggregationBuilder2 = AggregationBuilders.terms("group_by_color").field("color.keyword");
//subAggs2
StatsAggregationBuilder statsAggregationBuilder = AggregationBuilders.stats("stats_price").field("price");
termsAggregationBuilder2.subAggregation(statsAggregationBuilder);
termsAggregationBuilder.subAggregation(termsAggregationBuilder2);
searchSourceBuilder.aggregation(termsAggregationBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Terms termsAggs = searchResponse.getAggregations().get("group_by_brand");
List<? extends Terms.Bucket> buckets = termsAggs.getBuckets();
for (Terms.Bucket bucket : buckets) {
Terms groupByColorAggs = bucket.getAggregations().get("group_by_color");
List<? extends Terms.Bucket> colorAggsBuckets = groupByColorAggs.getBuckets();
for (Terms.Bucket colorAggsBucket : colorAggsBuckets) {
Stats statsPriceAggs = colorAggsBucket.getAggregations().get("stats_price");
System.out.println("count: " + statsPriceAggs.getCount());
System.out.println("max: " + statsPriceAggs.getMax());
System.out.println("min: " + statsPriceAggs.getMin());
System.out.println("avg: " + statsPriceAggs.getAvg());
System.out.println("total: " + statsPriceAggs.getSum());
}
}
}
Pipeline Aggregation
對聚合分析的結果再次進行聚合分析.
(1) main方法
public static void main(String[] args) throws IOException {
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
new HttpHost("localhost", 9200, "http")));
minBucketAggs(client);
statBucketAggs(client);
percentilesBucketAggs(client);
derivativeAggs(client);
cumulativeSumAggs(client);
moveFnAggs(client);
client.close();
}
(2) min_bucket
平均價格最低的品牌
private static void minBucketAggs(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//aggs
TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("group_by_brand").field("brand.keyword");
//subAggs
AvgAggregationBuilder avgAggregationBuilder = AggregationBuilders.avg("avg_price").field("price");
termsAggregationBuilder.subAggregation(avgAggregationBuilder);
searchSourceBuilder.aggregation(termsAggregationBuilder);
//sublingAggs
MinBucketPipelineAggregationBuilder minBucketPipelineAggregationBuilder = PipelineAggregatorBuilders.minBucket("min_price_by_brand", "group_by_brand>avg_price");
searchSourceBuilder.aggregation(minBucketPipelineAggregationBuilder);
searchRequest.source(searchSourceBuilder);
//result
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Terms termAggs = searchResponse.getAggregations().get("group_by_brand");
List<? extends Terms.Bucket> buckets = termAggs.getBuckets();
for (Terms.Bucket bucket : buckets) {
Avg avgPriceAggs = bucket.getAggregations().get("avg_price");
System.out.println("key:" + bucket.getKeyAsString() + "\ndoc_count:" + bucket.getDocCount() + "\navg_price:" + avgPriceAggs.getValue());
}
System.out.println();
BucketMetricValue bucketMetricValue = searchResponse.getAggregations().get("min_price_by_brand");
System.out.println("value: " + bucketMetricValue.getValueAsString() + "\nkeys:" + Arrays.toString(bucketMetricValue.keys()));
}
(2) stats_bucket
所有品牌電視的平均價格的統計分析.
private static void statBucketAggs(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//aggs
TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("group_by_brand").field("brand.keyword");
//subAggs
AvgAggregationBuilder avgAggregationBuilder = AggregationBuilders.avg("avg_price").field("price");
termsAggregationBuilder.subAggregation(avgAggregationBuilder);
searchSourceBuilder.aggregation(termsAggregationBuilder);
//sublingAggs
StatsBucketPipelineAggregationBuilder statsBucketPipelineAggregationBuilder = PipelineAggregatorBuilders.statsBucket("stats_price_by_brand", "group_by_brand>avg_price");
searchSourceBuilder.aggregation(statsBucketPipelineAggregationBuilder);
searchRequest.source(searchSourceBuilder);
//result
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Terms termAggs = searchResponse.getAggregations().get("group_by_brand");
List<? extends Terms.Bucket> buckets = termAggs.getBuckets();
for (Terms.Bucket bucket : buckets) {
Avg avgPriceAggs = bucket.getAggregations().get("avg_price");
System.out.println("key:" + bucket.getKeyAsString() + "\ndoc_count:" + bucket.getDocCount() + "\navg_price:" + avgPriceAggs.getValue());
}
System.out.println();
StatsBucket statsBucket = searchResponse.getAggregations().get("stats_price_by_brand");
System.out.println("count: " + statsBucket.getCount());
System.out.println("max: " + statsBucket.getMax());
System.out.println("min: " + statsBucket.getMin());
System.out.println("avg: " + statsBucket.getAvg());
System.out.println("total: " + statsBucket.getSum());
}
(3) percentiles_bucket
所有品牌電視的平均價格的百分比統計
private static void percentilesBucketAggs(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//aggs
TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("group_by_brand").field("brand.keyword");
//subAggs
AvgAggregationBuilder avgAggregationBuilder = AggregationBuilders.avg("avg_price").field("price");
termsAggregationBuilder.subAggregation(avgAggregationBuilder);
searchSourceBuilder.aggregation(termsAggregationBuilder);
//sublingAggs
double[] percents = {50, 90, 99};
PercentilesBucketPipelineAggregationBuilder percentilesBucketPipelineAggregationBuilder = PipelineAggregatorBuilders.percentilesBucket("percentiles_price_by_brand", "group_by_brand>avg_price").setPercents(percents);
searchSourceBuilder.aggregation(percentilesBucketPipelineAggregationBuilder);
searchRequest.source(searchSourceBuilder);
//result
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Terms termAggs = searchResponse.getAggregations().get("group_by_brand");
List<? extends Terms.Bucket> buckets = termAggs.getBuckets();
for (Terms.Bucket bucket : buckets) {
Avg avgPriceAggs = bucket.getAggregations().get("avg_price");
System.out.println("key:" + bucket.getKeyAsString() + "\ndoc_count:" + bucket.getDocCount() + "\navg_price:" + avgPriceAggs.getValue());
}
System.out.println();
ParsedPercentilesBucket percentilesBucket = searchResponse.getAggregations().get("percentiles_price_by_brand");
for (Percentile percentile : percentilesBucket) {
System.out.println(percentile.getPercent() + ": " + percentile.getValue());
}
}
(4) derivative
對按照sold_date分組的平均價格求導
private static void derivativeAggs(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//aggs
DateHistogramAggregationBuilder dateHistogramAggregationBuilder = AggregationBuilders.dateHistogram("sold_date_aggs").field("sold_date").calendarInterval(DateHistogramInterval.MONTH).format("yyyy-MM-dd");
//subAggs
AvgAggregationBuilder avgAggregationBuilder = AggregationBuilders.avg("avg_price_aggs").field("price");
dateHistogramAggregationBuilder.subAggregation(avgAggregationBuilder);
//parentAggs
DerivativePipelineAggregationBuilder derivativePipelineAggregationBuilder = PipelineAggregatorBuilders.derivative("derivative_avg_price_aggs", "avg_price_aggs");
dateHistogramAggregationBuilder.subAggregation(derivativePipelineAggregationBuilder);
searchSourceBuilder.aggregation(dateHistogramAggregationBuilder);
searchRequest.source(searchSourceBuilder);
//result
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Histogram histogramAggs = searchResponse.getAggregations().get("sold_date_aggs");
List<? extends Histogram.Bucket> buckets = histogramAggs.getBuckets();
for (Histogram.Bucket bucket : buckets) {
Avg avgPriceAggs = bucket.getAggregations().get("avg_price_aggs");
System.out.println("key:" + bucket.getKeyAsString() + "\ndoc_count:" + bucket.getDocCount() + "\navg_price:" + avgPriceAggs.getValue());
Derivative derivativeAvgPriceAggs = bucket.getAggregations().get("derivative_avg_price_aggs");
if (derivativeAvgPriceAggs != null) {
System.out.println(derivativeAvgPriceAggs.normalizedValue());
}
}
}
(5) cumulative_sum
對按照sold_date分組的平均價格累計求和.
private static void cumulativeSumAggs(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//aggs
DateHistogramAggregationBuilder dateHistogramAggregationBuilder = AggregationBuilders.dateHistogram("sold_date_aggs").field("sold_date").calendarInterval(DateHistogramInterval.MONTH).format("yyyy-MM-dd");
//subAggs
AvgAggregationBuilder avgAggregationBuilder = AggregationBuilders.avg("avg_price_aggs").field("price");
dateHistogramAggregationBuilder.subAggregation(avgAggregationBuilder);
//parentAggs
CumulativeSumPipelineAggregationBuilder cumulativeSumPipelineAggregationBuilder = PipelineAggregatorBuilders.cumulativeSum("cumulative_sum_avg_price_aggs", "avg_price_aggs");
dateHistogramAggregationBuilder.subAggregation(cumulativeSumPipelineAggregationBuilder);
searchSourceBuilder.aggregation(dateHistogramAggregationBuilder);
searchRequest.source(searchSourceBuilder);
//result
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Histogram histogramAggs = searchResponse.getAggregations().get("sold_date_aggs");
List<? extends Histogram.Bucket> buckets = histogramAggs.getBuckets();
for (Histogram.Bucket bucket : buckets) {
Avg avgPriceAggs = bucket.getAggregations().get("avg_price_aggs");
System.out.println("key:" + bucket.getKeyAsString() + "\ndoc_count:" + bucket.getDocCount() + "\navg_price:" + avgPriceAggs.getValue());
ParsedSimpleValue parsedSimpleValue = bucket.getAggregations().get("cumulative_sum_avg_price_aggs");
if (parsedSimpleValue != null) {
System.out.println("cumulative_sum_avg_price_aggs: " + parsedSimpleValue.getValueAsString());
}
}
}
(6) moving_fn
對按照sold_date分組的平均價格, 按時間窗口求最小平均價格
private static void moveFnAggs(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//aggs
DateHistogramAggregationBuilder dateHistogramAggregationBuilder = AggregationBuilders.dateHistogram("sold_date_aggs").field("sold_date").calendarInterval(DateHistogramInterval.MONTH).format("yyyy-MM-dd");
//subAggs
AvgAggregationBuilder avgAggregationBuilder = AggregationBuilders.avg("avg_price_aggs").field("price");
dateHistogramAggregationBuilder.subAggregation(avgAggregationBuilder);
//parentAggs
MovFnPipelineAggregationBuilder movFnPipelineAggregationBuilder = PipelineAggregatorBuilders.movingFunction("moving_fn_avg_price_aggs", new Script("MovingFunctions.min(values)"), "avg_price_aggs", 10);
dateHistogramAggregationBuilder.subAggregation(movFnPipelineAggregationBuilder);
searchSourceBuilder.aggregation(dateHistogramAggregationBuilder);
searchRequest.source(searchSourceBuilder);
//result
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Histogram histogramAggs = searchResponse.getAggregations().get("sold_date_aggs");
List<? extends Histogram.Bucket> buckets = histogramAggs.getBuckets();
for (Histogram.Bucket bucket : buckets) {
Avg avgPriceAggs = bucket.getAggregations().get("avg_price_aggs");
System.out.println("key:" + bucket.getKeyAsString() + "\ndoc_count:" + bucket.getDocCount() + "\navg_price:" + avgPriceAggs.getValue());
ParsedSimpleValue parsedSimpleValue = bucket.getAggregations().get("moving_fn_avg_price_aggs");
if (parsedSimpleValue != null) {
System.out.println("moving_fn_avg_price_aggs: " + parsedSimpleValue.getValueAsString());
}
}
}
Matrix Aggregation
支持對多個字段進行統計分析, 併爲每個字段提供一個結果矩陣.
private static void moveFnAggs(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//aggs
List<String> fields = new ArrayList<>();
fields.add("price");
MatrixStatsAggregationBuilder matrixStatsAggregationBuilder = MatrixStatsAggregationBuilders.matrixStats("statistics").fields(fields);
searchSourceBuilder.aggregation(matrixStatsAggregationBuilder);
searchRequest.source(searchSourceBuilder);
//result
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
ParsedMatrixStats parsedMatrixStats = searchResponse.getAggregations().get("statistics");
System.out.println("count: " + parsedMatrixStats.getFieldCount("price"));
System.out.println("mean: " + parsedMatrixStats.getMean("price"));
System.out.println("variance: " + parsedMatrixStats.getVariance("price"));
System.out.println("skewness: " + parsedMatrixStats.getSkewness("price"));
System.out.println("kurtosis: " + parsedMatrixStats.getKurtosis("price"));
System.out.println("covariance: " + parsedMatrixStats.getCovariance("price","price"));
System.out.println("correlation: " + parsedMatrixStats.getCorrelation("price", "price"));
}
聚合的作用範圍及排序
Elasticsearch聚合分析的默認作用範圍是query的查詢結果集, 同時Elasticsearch還支持以下方式改變聚合的作用範圍.
- filter
- post_filter
- global
(1) main方法
public static void main(String[] args) throws IOException {
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
new HttpHost("localhost", 9200, "http")));
query(client);
filter(client);
postFilter(client);
global(client);
orderAggs(client);
orderSubAggs(client);
client.close();
}
(2) query
先使用query進行過濾, 然後再進行聚合操作.
private static void query(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//query
RangeQueryBuilder rangeQueryBuilder = QueryBuilders.rangeQuery("price").gte(2000);
searchSourceBuilder.query(rangeQueryBuilder);
//aggs
TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("group_by_brand").field("brand.keyword");
searchSourceBuilder.aggregation(termsAggregationBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
//result
Terms termsAggs = searchResponse.getAggregations().get("group_by_brand");
List<? extends Terms.Bucket> buckets = termsAggs.getBuckets();
for (Terms.Bucket bucket : buckets) {
System.out.println("key: " + bucket.getKeyAsString() + "\ndoc_count: " + bucket.getDocCount());
}
}
(3) filter
聚合內的filter只對當前的子聚合語句生效.
private static void filter(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//aggs1
RangeQueryBuilder rangeQueryBuilder = QueryBuilders.rangeQuery("price").gte(2000);
FilterAggregationBuilder filterAggregationBuilder = AggregationBuilders.filter("price_filter", rangeQueryBuilder);
//subAggs
TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("group_by_brand").field("brand.keyword");
filterAggregationBuilder.subAggregation(termsAggregationBuilder);
searchSourceBuilder.aggregation(filterAggregationBuilder);
//aggs2
TermsAggregationBuilder termsAggregationBuilder2 = AggregationBuilders.terms("group_by_all_brand").field("brand.keyword");
searchSourceBuilder.aggregation(termsAggregationBuilder2);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
//result
Terms termsAggs = searchResponse.getAggregations().get("group_by_all_brand");
List<? extends Terms.Bucket> buckets = termsAggs.getBuckets();
for (Terms.Bucket bucket : buckets) {
System.out.println("key: " + bucket.getKeyAsString() + "\ndoc_count: " + bucket.getDocCount());
}
Filter filterAggs = searchResponse.getAggregations().get("price_filter");
Terms termsAggs2 = filterAggs.getAggregations().get("group_by_brand");
List<? extends Terms.Bucket> buckets2 = termsAggs2.getBuckets();
for (Terms.Bucket bucket : buckets2) {
System.out.println("key: " + bucket.getKeyAsString() + "\ndoc_count: " + bucket.getDocCount());
}
}
(4) post_filter
post_filter是對聚合分析後的文檔再次過濾, size不要設置爲0.
private static void postFilter(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//aggs
TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("group_by_brand").field("brand.keyword");
searchSourceBuilder.aggregation(termsAggregationBuilder);
//post_filter
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("brand.keyword", "小米");
searchSourceBuilder.postFilter(termQueryBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
//result
Terms termsAggs = searchResponse.getAggregations().get("group_by_brand");
List<? extends Terms.Bucket> buckets = termsAggs.getBuckets();
for (Terms.Bucket bucket : buckets) {
System.out.println("key: " + bucket.getKeyAsString() + "\ndoc_count: " + bucket.getDocCount());
}
SearchHit[] hits = searchResponse.getHits().getHits();
for (SearchHit hit : hits) {
System.out.println(hit.getSourceAsString());
}
}
(5) global
global無視query過濾, 對全部文檔進行統計.
private static void global(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//query
RangeQueryBuilder rangeQueryBuilder = QueryBuilders.rangeQuery("price").gte(2000);
searchSourceBuilder.query(rangeQueryBuilder);
//aggs
TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("group_by_brand").field("brand.keyword");
searchSourceBuilder.aggregation(termsAggregationBuilder);
//subAggs
AvgAggregationBuilder avgAggregationBuilder = AggregationBuilders.avg("avg_price").field("price");
GlobalAggregationBuilder globalAggregationBuilder = AggregationBuilders.global("all_price_aggs").subAggregation(avgAggregationBuilder);
searchSourceBuilder.aggregation(globalAggregationBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
//result
Terms termsAggs = searchResponse.getAggregations().get("group_by_brand");
List<? extends Terms.Bucket> buckets = termsAggs.getBuckets();
for (Terms.Bucket bucket : buckets) {
System.out.println("key: " + bucket.getKeyAsString() + "\ndoc_count: " + bucket.getDocCount());
}
Global allPriceAggs = searchResponse.getAggregations().get("all_price_aggs");
Avg avgPriceAggs = allPriceAggs.getAggregations().get("avg_price");
System.out.println("\ndoc_count: " + allPriceAggs.getDocCount() + "\navg_price: " + avgPriceAggs.getValue());
}
排序
通過order字段來指定排序, 默認情況下根據doc_count降序排列.
(1) 根據doc_count升序, key降序來排序
private static void orderAggs(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//aggs
List<BucketOrder> orders = new ArrayList<>();
orders.add(BucketOrder.count(true));
orders.add(BucketOrder.key(false));
TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("group_by_brand").field("brand.keyword").order(orders);
searchSourceBuilder.aggregation(termsAggregationBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
//result
Terms termsAggs = searchResponse.getAggregations().get("group_by_brand");
List<? extends Terms.Bucket> buckets = termsAggs.getBuckets();
for (Terms.Bucket bucket : buckets) {
System.out.println("key: " + bucket.getKeyAsString() + "\ndoc_count: " + bucket.getDocCount());
}
}
(2) 基於子聚合的值排序
private static void orderSubAggs(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("sales");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
//aggs
BucketOrder bucketOrder = BucketOrder.aggregation("avg_price", false);
TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("group_by_brand").field("brand.keyword").order(bucketOrder);
searchSourceBuilder.aggregation(termsAggregationBuilder);
AvgAggregationBuilder avgAggregationBuilder = AggregationBuilders.avg("avg_price").field("price");
termsAggregationBuilder.subAggregation(avgAggregationBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
//result
Terms termsAggs = searchResponse.getAggregations().get("group_by_brand");
List<? extends Terms.Bucket> buckets = termsAggs.getBuckets();
for (Terms.Bucket bucket : buckets) {
Avg avgPriceAggs = bucket.getAggregations().get("avg_price");
System.out.println("key: " + bucket.getKeyAsString() + "\ndoc_count: " + bucket.getDocCount() + "\navg_price: " + avgPriceAggs.getValue());
}
}