Hive分組取Top N
Hive在0.11.0版本開始加入了row_number、rank、dense_rank分析函數,可以查詢分組排序後的top值
使用規則:
區別
實際操作
create table t(name string, sub string, score int) row format delimited fields terminated by '\t';
a chinese 98 a english 90 d chinese 88 c english 82 c math 98 b math 89 b chinese 79 z english 90 z math 89 z chinese 80 e math 99 e english 87 d english 90 加載數據 load data local inpath '/home/hadoop/hive-example/a.txt' into table tb4;
--row_number select *, row_number() over (partition by sub order by score) as od from t; --rank select *, rank() over (partition by sub order by score) as od from t; --dense_ran select *, dense_rank() over (partition by sub order by score desc) from t;
業務實例
--統計每個學科的前三名 select * from (select *, row_number() over (partition by sub order by score desc) as od from t ) t where od<=3; --語文成績是80分的排名是多少 select od from (select *, row_number() over (partition by sub order by score desc) as od from t ) t where sub='chinese' and score=80; --分頁查詢 select * from (select *, row_number() over () as rn from t) t1 where rn between 1 and 5;