Hive高级聚合(0.10开始支持)
高级聚合功能相当于group by 加强
grouping sets:多个group by 进行union all,在同一个数据集进行多重group by
该关键字可以实现同一数据集的多重group by操作。事实上GROUPING SETS是多个GROUP BY进行UNION ALL操作的简单表达,它仅仅使用一个stage完成这些操作。GROUPING SETS的子句中如果包含()数据集,则表示整体聚合。
select name,work_space[0] from employee group by name, work_space[0] grouping sets((name,work_space[0]),name,());
// 上面语句与下面语句等效
select name, work_space[0] as main_place, count(employee_id) as emp_id_cnt
from employee
group by name, work_space[0]
UNION ALL
select name, work_space[0] as main_place, count(employee_id) as emp_id_cntfrom employee
group by name
UNION ALL
select name, work_space[0] as main_place, count(employee_id) as emp_id_cntfrom employee;
rollup
group by a,b,c with rollup <=> grouping sets((a,b,c),(a,b),(a),())
cube
group by a,b,c with cube <=> grouping sets((a,b,c),(a,b),(a,c),(b,c),(a),(b),(c),())
参考:https://www.jianshu.com/p/9502e1d58f45