數(shù)據(jù)準(zhǔn)備
CREATE EXTERNAL TABLE lxw1234 (
cookieid string,
createtime string, --day
pv INT
) ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
stored as textfile location '/tmp/lxw11/';
DESC lxw1234;
cookieid STRING
createtime STRING
pv INT
hive> select * from lxw1234;
OK
cookie1 2015-04-10 1
cookie1 2015-04-11 5
cookie1 2015-04-12 7
cookie1 2015-04-13 3
cookie1 2015-04-14 2
cookie1 2015-04-15 4
cookie1 2015-04-16 4
SUM
— 注意,結(jié)果和ORDER BY
相關(guān),默認(rèn)為升序
SELECT cookieid,
createtime,
pv,
SUM(pv) OVER(PARTITION BY cookieid ORDER BY createtime) AS pv1, -- 默認(rèn)為從起點(diǎn)到當(dāng)前行
SUM(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS pv2, --從起點(diǎn)到當(dāng)前行寝并,結(jié)果同pv1
SUM(pv) OVER(PARTITION BY cookieid) AS pv3, --分組內(nèi)所有行
SUM(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN 3 PRECEDING AND CURRENT ROW) AS pv4, --當(dāng)前行+往前3行
SUM(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN 3 PRECEDING AND 1 FOLLOWING) AS pv5, --當(dāng)前行+往前3行+往后1行
SUM(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS pv6 ---當(dāng)前行+往后所有行
FROM lxw1234;
cookieid createtime pv pv1 pv2 pv3 pv4 pv5 pv6
-----------------------------------------------------------------------------
cookie1 2015-04-10 1 1 1 26 1 6 26
cookie1 2015-04-11 5 6 6 26 6 13 25
cookie1 2015-04-12 7 13 13 26 13 16 20
cookie1 2015-04-13 3 16 16 26 16 18 13
cookie1 2015-04-14 2 18 18 26 17 21 10
cookie1 2015-04-15 4 22 22 26 16 20 8
cookie1 2015-04-16 4 26 26 26 13 13 4
pv1: 分組內(nèi)從起點(diǎn)到當(dāng)前行的pv累積,如腹备,11號(hào)的pv1=10號(hào)的pv+11號(hào)的pv, 12號(hào)=10號(hào)+11號(hào)+12號(hào)
pv2: 同pv1
pv3: 分組內(nèi)(cookie1)所有的pv累加
pv4: 分組內(nèi)當(dāng)前行+往前3行衬潦,如,11號(hào)=10號(hào)+11號(hào)植酥, 12號(hào)=10號(hào)+11號(hào)+12號(hào)镀岛, 13號(hào)=10號(hào)+11號(hào)+12號(hào)+13號(hào), 14號(hào)=11號(hào)+12號(hào)+13號(hào)+14號(hào)
pv5: 分組內(nèi)當(dāng)前行+往前3行+往后1行友驮,如漂羊,14號(hào)=11號(hào)+12號(hào)+13號(hào)+14號(hào)+15號(hào)=5+7+3+2+4=21
pv6: 分組內(nèi)當(dāng)前行+往后所有行,如卸留,13號(hào)=13號(hào)+14號(hào)+15號(hào)+16號(hào)=3+2+4+4=13拨与,14號(hào)=14號(hào)+15號(hào)+16號(hào)=2+4+4=10
如果不指定ROWS BETWEEN
,默認(rèn)為從起點(diǎn)到當(dāng)前行;
如果不指定ORDER BY
,則將分組內(nèi)所有值累加;
關(guān)鍵是理解ROWS BETWEEN
含義,也叫做WINDOW
子句:
PRECEDING
:往前
FOLLOWING
:往后
CURRENT ROW
:當(dāng)前行
UNBOUNDED
:起點(diǎn)艾猜,UNBOUNDED PRECEDING
表示從前面的起點(diǎn), UNBOUNDED FOLLOWING
表示到后面的終點(diǎn)
AVG
捻悯,MIN
匆赃,MAX
,和SUM
用法一樣
-- AVG
SELECT cookieid,
createtime,
pv,
AVG(pv) OVER(PARTITION BY cookieid ORDER BY createtime) AS pv1, -- 默認(rèn)為從起點(diǎn)到當(dāng)前行
AVG(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS pv2, --從起點(diǎn)到當(dāng)前行今缚,結(jié)果同pv1
AVG(pv) OVER(PARTITION BY cookieid) AS pv3, --分組內(nèi)所有行
AVG(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN 3 PRECEDING AND CURRENT ROW) AS pv4, --當(dāng)前行+往前3行
AVG(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN 3 PRECEDING AND 1 FOLLOWING) AS pv5, --當(dāng)前行+往前3行+往后1行
AVG(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS pv6 ---當(dāng)前行+往后所有行
FROM lxw1234;
cookieid createtime pv pv1 pv2 pv3 pv4 pv5 pv6
-----------------------------------------------------------------------------
cookie1 2015-04-10 1 1.0 1.0 3.7142857142857144 1.0 3.0 3.7142857142857144
cookie1 2015-04-11 5 3.0 3.0 3.7142857142857144 3.0 4.333333333333333 4.166666666666667
cookie1 2015-04-12 7 4.333333333333333 4.333333333333333 3.7142857142857144 4.333333333333333 4.0 4.0
cookie1 2015-04-13 3 4.0 4.0 3.7142857142857144 4.0 3.6 3.25
cookie1 2015-04-14 2 3.6 3.6 3.7142857142857144 4.25 4.2 3.3333333333333335
cookie1 2015-04-15 4 3.6666666666666665 3.6666666666666665 3.7142857142857144 4.0 4.0 4.0
cookie1 2015-04-16 4 3.7142857142857144 3.7142857142857144 3.7142857142857144 3.25 3.25 4.0
-- MIN
SELECT cookieid,
createtime,
pv,
MIN(pv) OVER(PARTITION BY cookieid ORDER BY createtime) AS pv1, -- 默認(rèn)為從起點(diǎn)到當(dāng)前行
MIN(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS pv2, --從起點(diǎn)到當(dāng)前行算柳,結(jié)果同pv1
MIN(pv) OVER(PARTITION BY cookieid) AS pv3, --分組內(nèi)所有行
MIN(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN 3 PRECEDING AND CURRENT ROW) AS pv4, --當(dāng)前行+往前3行
MIN(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN 3 PRECEDING AND 1 FOLLOWING) AS pv5, --當(dāng)前行+往前3行+往后1行
MIN(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS pv6 ---當(dāng)前行+往后所有行
FROM lxw1234;
cookieid createtime pv pv1 pv2 pv3 pv4 pv5 pv6
-----------------------------------------------------------------------------
cookie1 2015-04-10 1 1 1 1 1 1 1
cookie1 2015-04-11 5 1 1 1 1 1 2
cookie1 2015-04-12 7 1 1 1 1 1 2
cookie1 2015-04-13 3 1 1 1 1 1 2
cookie1 2015-04-14 2 1 1 1 2 2 2
cookie1 2015-04-15 4 1 1 1 2 2 4
cookie1 2015-04-16 4 1 1 1 2 2 4
-- MAX
SELECT cookieid,
createtime,
pv,
MAX(pv) OVER(PARTITION BY cookieid ORDER BY createtime) AS pv1, -- 默認(rèn)為從起點(diǎn)到當(dāng)前行
MAX(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS pv2, --從起點(diǎn)到當(dāng)前行,結(jié)果同pv1
MAX(pv) OVER(PARTITION BY cookieid) AS pv3, --分組內(nèi)所有行
MAX(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN 3 PRECEDING AND CURRENT ROW) AS pv4, --當(dāng)前行+往前3行
MAX(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN 3 PRECEDING AND 1 FOLLOWING) AS pv5, --當(dāng)前行+往前3行+往后1行
MAX(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS pv6 ---當(dāng)前行+往后所有行
FROM lxw1234;
cookieid createtime pv pv1 pv2 pv3 pv4 pv5 pv6
-----------------------------------------------------------------------------
cookie1 2015-04-10 1 1 1 7 1 5 7
cookie1 2015-04-11 5 5 5 7 5 7 7
cookie1 2015-04-12 7 7 7 7 7 7 7
cookie1 2015-04-13 3 7 7 7 7 7 4
cookie1 2015-04-14 2 7 7 7 7 7 4
cookie1 2015-04-15 4 7 7 7 7 7 4
cookie1 2015-04-16 4 7 7 7 4 4 4