分析函数 —— Avg移动开窗的一些思考和探索

导火索:求出的是移动平均:

select t2.*
      ,avg(t2.amt)over(partition by dimShopID order by amt  desc) as a1
from (select t1.dimShopID
      ,t1.dimDateID
      ,sum(AMT) as amt
from dw.fct_sales t1
where dimDateID between 20170801 and 20170810
group by t1.dimShopID
      ,t1.dimDateID) t2     

-- 输出结果:
#	dimShopID	dimDateID	amt	a1
1	33	20,170,803	75,431.34	75,431.34
2	33	20,170,801	65,459.27	70,445.305
3	33	20,170,804	63,718.71	68,203.106666666
4	33	20,170,802	58,404.6	65,753.48
5	33	20,170,807	56,704.89	63,943.762
6	33	20,170,806	52,233.34	61,992.025
7	33	20,170,810	46,969.19	59,845.905714285
8	33	20,170,808	46,852.21	58,221.69375
9	33	20,170,809	45,016.49	56,754.448888888
10	33	20,170,805	37,233.64	54,802.368
11	34	20,170,803	114,570.17	114,570.17
12	34	20,170,801	79,596.28	97,083.225
13	34	20,170,804	73,828.05	89,331.5
14	34	20,170,806	65,252.32	83,311.705
15	34	20,170,802	64,027.97	79,454.958
16	34	20,170,807	52,923.21	75,033
17	34	20,170,809	47,571.3	71,109.9
18	34	20,170,810	47,543.23	68,164.06625
19	34	20,170,805	44,916.38	65,580.99
20	34	20,170,808	44,691.52	63,492.043

变式1:可用于组内比较

-- 变式1:可用于组内比较

-- 应用:
-- (1)求出每个科目成绩大于该科目平均成绩的学生;
-- (2)在该份数据中,就可以用来求,每个门店销售总额高于平均总额的日期
 
SELECT  * 
FROM
(select t2.*
      ,avg(t2.amt)over(partition by dimShopID ) as a1 -- 计算的窗口的平均数
from (select t1.dimShopID
      ,t1.dimDateID
      ,sum(AMT) as amt
from dw.fct_sales t1
where dimDateID between 20170801 and 20170810
group by t1.dimShopID
      ,t1.dimDateID) t2 
)t
where amt > a1 -- where限定行,需要新建一个子查询

-- 输出结果:

行号	dimShopID	dimDateID	amt	a1
1	33	20,170,801	65,459.27	54,802.368
2	33	20,170,802	58,404.6	54,802.368
3	33	20,170,803	75,431.34	54,802.368
4	33	20,170,804	63,718.71	54,802.368
5	33	20,170,807	56,704.89	54,802.368
6	34	20,170,801	79,596.28	63,492.043
7	34	20,170,802	64,027.97	63,492.043
8	34	20,170,803	114,570.17	63,492.043
9	34	20,170,804	73,828.05	63,492.043
10	34	20,170,806	65,252.32	63,492.043

变式2:分析函数中不使用 partion by,仅仅使用 order by

-- 变式2:分析函数中不使用 partion by,仅仅使用 order by

select t2.*
      ,avg(t2.amt)over(order by amt  desc) as a1
from (select t1.dimShopID
      ,t1.dimDateID
      ,sum(AMT) as amt
from dw.fct_sales t1
where dimDateID between 20170801 and 20170810
group by t1.dimShopID
      ,t1.dimDateID) t2;
      
-- 输出:
#	dimShopID	dimDateID	amt	a1
1	34	20,170,803	114,570.17	114,570.17
2	34	20,170,801	79,596.28	97,083.225
3	33	20,170,803	75,431.34	89,865.93
4	34	20,170,804	73,828.05	85,856.46
5	33	20,170,801	65,459.27	81,777.022
6	34	20,170,806	65,252.32	79,022.905
7	34	20,170,802	64,027.97	76,880.771428571
8	33	20,170,804	63,718.71	75,235.51375
9	33	20,170,802	58,404.6	73,365.412222222
10	33	20,170,807	56,704.89	71,699.36
11	34	20,170,807	52,923.21	69,992.437272727
12	33	20,170,806	52,233.34	68,512.5125
13	34	20,170,809	47,571.3	66,901.65
14	34	20,170,810	47,543.23	65,518.905714285
15	33	20,170,810	46,969.19	64,282.258
16	33	20,170,808	46,852.21	63,192.88
17	33	20,170,809	45,016.49	62,123.680588235
18	34	20,170,805	44,916.38	61,167.719444444
19	34	20,170,808	44,691.52	60,300.551052631
20	33	20,170,805	37,233.64	59,147.2055

VS 变式2:

-- 实现效果同变式2,只是多了一个移动平均列
-- 实际过程,没啥意义
     
select t1.dimShopID
      ,t1.dimDateID
      ,sum(AMT) as amt
from dw.fct_sales t1
where dimDateID between 20170801 and 20170810
group by t1.dimShopID
      ,t1.dimDateID
order by amt desc ; 

-- 输出结果:
#	dimShopID	dimDateID	amt
1	34	20,170,803	114,570.17
2	34	20,170,801	79,596.28
3	33	20,170,803	75,431.34
4	34	20,170,804	73,828.05
5	33	20,170,801	65,459.27
6	34	20,170,806	65,252.32
7	34	20,170,802	64,027.97
8	33	20,170,804	63,718.71
9	33	20,170,802	58,404.6
10	33	20,170,807	56,704.89
11	34	20,170,807	52,923.21
12	33	20,170,806	52,233.34
13	34	20,170,809	47,571.3
14	34	20,170,810	47,543.23
15	33	20,170,810	46,969.19
16	33	20,170,808	46,852.21
17	33	20,170,809	45,016.49
18	34	20,170,805	44,916.38
19	34	20,170,808	44,691.52
20	33	20,170,805	37,233.64
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章