目錄
改寫SQL實現
使用grouping sets代替union
-- 優化前
SELECT s_age,s_sex,count(1)
FROM student_tb_orc
GROUP BY s_age,s_sex
UNION ALL
SELECT s_age,null,count(1)
FROM student_tb_orc
GROUP BY s_age,s_sex;
-- 優化後
SELECT s_age,s_sex,count(1)
FROM student_tb_orc
GROUP BY s_age,s_sex
GROUPING SETS(s_age,(s_age,s_sex));
分解count(distinct)
-- 原代碼
SELECT COUNT(DISTINCT user_id),COUNT(DISTINCT cuid)
FROM udw.udw_event
WHERE event_action='searchbox_mobile_app_operate'
AND event_day='${DAY}'
-- 上面代碼主要問題,COUNT_DISTINCT 只有1個reduce,形成單點瓶頸
-- 修改後代碼
SET mapred.reduce.tasks=300;
SELECT COUNT(DISTINCT user_id),COUNT(DISTINCT cuid)
FROM
(
SELECT cuid,user_id,COUNT(1)
FROM udw.udw_event
WHERE event_action='searchbox_mobile_app_operate'
AND event_day='${DAY}'
GROUP BY cuid
) tmp_t;
使用SQL-Hint 語法
MAPJOIN Hint
STREAMTABLE
Hive配置開關
並行執行
向量開關
參考文章
1. https://blog.csdn.net/hysfwjr/article/details/104071583
2.《Hive性能調優實戰》,林志煌