---------開窗函數(shù)(窗口函數(shù))-分組排序開窗函數(shù)------------------------------
-- 序號函數(shù):row_number桂对,rank蟆技,dense_rank
-- 1.1準(zhǔn)備數(shù)據(jù)test1.txt
/*
cookie1,2018-04-10,1
cookie1,2018-04-11,5
cookie1,2018-04-12,7
cookie1,2018-04-13,3
cookie1,2018-04-14,2
cookie1,2018-04-15,4
cookie1,2018-04-16,4
cookie2,2018-04-10,2
cookie2,2018-04-11,3
cookie2,2018-04-12,5
cookie2,2018-04-13,6
cookie2,2018-04-14,3
cookie2,2018-04-15,9
cookie2,2018-04-16,7
*/
create table test_ordinal_func
(
cookie_id string comment 'cookie id',
create_time string comment '創(chuàng)建時(shí)間',
pv int comment '訪問次數(shù)'
) row format delimited fields terminated by ',';
load data local inpath '/export/data/test_window.txt' into table test_ordinal_func;
select *
from test_ordinal_func;
-- 使用開窗函數(shù)來實(shí)現(xiàn)分組并組內(nèi)排序
-- 需求1:按照cookie_id進(jìn)行分組,并且在每一組中按照pv進(jìn)行降序排序
-- partition by:分組
select *,
-- 1 2 3 4 5
row_number() over (partition by cookie_id order by pv desc ) row_number,
-- 1 2 3 3 5
rank() over (partition by cookie_id order by pv desc ) rank,
-- 1 2 3 3 4
dense_rank() over (partition by cookie_id order by pv desc ) dense_rank
from test_ordinal_func;
-- 需求:按照cookie_id進(jìn)行分組身腻,并且在每一組中按照pv進(jìn)行降序排序,選出每一組排名前三的信息(TopN問題)
-- 以下寫法報(bào)錯(cuò)部凑,因?yàn)镾QL的執(zhí)行順序:from where select蹭睡,where的時(shí)候,別名還沒有出現(xiàn)
/*
select *,
dense_rank() over (partition by cookie_id order by pv desc ) as rk3 -- 1 2 3 3 4
from test_ordinal_func
where rk3 <= 3;
*/
-- 通過子查詢解決
select *
from (
select *,
dense_rank() over (partition by cookie_id order by pv desc ) as rk3 -- 1 2 3 3 4
from test_ordinal_func
) as dense_rank
where dense_rank.rk3 <= 3;
-- 需求:查詢新冠疫情數(shù)據(jù)表中每一個(gè)州確診人數(shù)最多的縣TopN
select *
from (
select *,
-- partition by:后面可跟多個(gè)字段(同州同縣分一組)
-- order by:后面可跟多個(gè)字段(主要條件相同比較次要條件。按確診病例降序懦胞,確證病例相同則按死亡病例降序)
dense_rank() over (partition by state, county order by cases desc, deaths desc) as rk
from covid2
) as dense_rank
where dense_rank.rk = 1;
-- 分組和排序的后邊都可以指定多個(gè)字段
/*
select *,
dense_rank() over (partition by cookie_id,pv order by cookie_id,create_time desc ) as rk3 -- 1 2 3 3 4
from test_ordinal_func;
*/
---------開窗函數(shù)(窗口函數(shù))-指定區(qū)間進(jìn)行統(tǒng)計(jì)開窗函數(shù)(聚合開窗函數(shù))------------------------------
-- 3替久、使用聚合開窗函數(shù)進(jìn)行統(tǒng)計(jì)
-- 累加區(qū)間:從第1行累加到當(dāng)前行(默認(rèn))
select cookie_id,
create_time,
pv,
sum(pv) over (partition by cookie_id order by create_time) as pv1
from test_ordinal_func;
-- 等價(jià)上邊的寫法
select cookie_id,
create_time,
pv,
sum(pv) over (partition by cookie_id order by create_time
rows between unbounded preceding and current row) as pv2
from test_ordinal_func;
-- 累加區(qū)間:從前3行累加到當(dāng)前行
select cookie_id,
create_time,
pv,
sum(pv) over (partition by cookie_id order by create_time
rows between 3 preceding and current row)
from test_ordinal_func;
-- 累加區(qū)間:從前3行累加到下1行
select cookie_id,
create_time,
pv,
sum(pv) over (partition by cookie_id order by create_time
rows between 3 preceding and 1 following) as pv5
from test_ordinal_func;
-- 累加區(qū)間:從當(dāng)前行加到組的最后
select cookie_id,
create_time,
pv,
sum(pv) over (partition by cookie_id order by create_time
rows between current row and unbounded following) as pv6
from test_ordinal_func;
-- 以上的sum可以替換為avg,max躏尉,min
select cookie_id,
create_time,
pv,
max(pv) over (partition by cookie_id order by create_time) as pv1
from test_ordinal_func;
select cookie_id,
create_time,
pv,
min(pv) over (partition by cookie_id order by create_time) as pv1
from test_ordinal_func;
select cookie_id,
create_time,
pv,
avg(pv) over (partition by cookie_id order by create_time) as pv1
from test_ordinal_func;
-- 前后函數(shù):lag lead
-- lag
-- 將上1行數(shù)據(jù)放在當(dāng)前行
select cookie_id,
create_time,
pv,
lag(create_time, 1) over (partition by cookie_id order by create_time)
from test_ordinal_func;
-- 將上2行數(shù)據(jù)放在當(dāng)前行
select cookie_id,
create_time,
pv,
lag(create_time, 2) over (partition by cookie_id order by create_time)
from test_ordinal_func;
-- lead
-- 將下1行數(shù)據(jù)放在當(dāng)前行
select cookie_id,
create_time,
pv,
lead(create_time, 1) over (partition by cookie_id order by create_time)
from test_ordinal_func;
-- 將下2數(shù)據(jù)放在當(dāng)前行
select cookie_id,
create_time,
pv,
lead(create_time, 2) over (partition by cookie_id order by create_time)
from test_ordinal_func;
2022-03-07 開窗函數(shù)
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
- 文/潘曉璐 我一進(jìn)店門碌秸,熙熙樓的掌柜王于貴愁眉苦臉地迎上來,“玉大人悄窃,你說我怎么就攤上這事讥电。” “怎么了轧抗?”我有些...
- 文/不壞的土叔 我叫張陵恩敌,是天一觀的道長。 經(jīng)常有香客問我横媚,道長纠炮,這世上最難降的妖魔是什么? 我笑而不...
- 正文 為了忘掉前任分唾,我火速辦了婚禮抗碰,結(jié)果婚禮上,老公的妹妹穿的比我還像新娘绽乔。我一直安慰自己弧蝇,他們只是感情好,可當(dāng)我...
- 文/花漫 我一把揭開白布折砸。 她就那樣靜靜地躺著看疗,像睡著了一般。 火紅的嫁衣襯著肌膚如雪睦授。 梳的紋絲不亂的頭發(fā)上两芳,一...
- 文/蒼蘭香墨 我猛地睜開眼,長吁一口氣:“原來是場噩夢啊……” “哼特咆!你這毒婦竟也來了季惩?” 一聲冷哼從身側(cè)響起,我...
- 序言:老撾萬榮一對情侶失蹤腻格,失蹤者是張志新(化名)和其女友劉穎画拾,沒想到半個(gè)月后,有當(dāng)?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體菜职,經(jīng)...
- 正文 獨(dú)居荒郊野嶺守林人離奇死亡青抛,尸身上長有42處帶血的膿包…… 初始之章·張勛 以下內(nèi)容為張勛視角 年9月15日...
- 正文 我和宋清朗相戀三年,在試婚紗的時(shí)候發(fā)現(xiàn)自己被綠了些楣。 大學(xué)時(shí)的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片脂凶。...
- 正文 年R本政府宣布,位于F島的核電站罪帖,受9級特大地震影響促煮,放射性物質(zhì)發(fā)生泄漏。R本人自食惡果不足惜整袁,卻給世界環(huán)境...
- 文/蒙蒙 一冀泻、第九天 我趴在偏房一處隱蔽的房頂上張望赖欣。 院中可真熱鬧,春花似錦、人聲如沸椿胯。這莊子的主人今日做“春日...
- 文/蒼蘭香墨 我抬頭看了看天上的太陽戳稽。三九已至,卻和暖如春痹仙,著一層夾襖步出監(jiān)牢的瞬間是尔,已是汗流浹背。 一陣腳步聲響...
- 正文 我出身青樓,卻偏偏與公主長得像恩溅,于是被迫代替她去往敵國和親痕囱。 傳聞我的和親對象是個(gè)殘疾皇子,可洞房花燭夜當(dāng)晚...
推薦閱讀更多精彩內(nèi)容
- 在使用數(shù)據(jù)庫/數(shù)據(jù)倉庫的過程中暴匠,我們可能會(huì)遇到以下幾個(gè)情況: 需要得到的數(shù)據(jù)既要包括聚合后的數(shù)據(jù)鞍恢, 也需要包括沒被...
- Spark 1.4.x版本以后,為Spark SQL和DataFrame引入了開窗函數(shù)每窖,比如最經(jīng)典帮掉,最常用的,ro...
- 1:了解substr函數(shù)(截取函數(shù))窒典,從start位置開始提取字符串蟆炊,示例如下 2:用substr函數(shù)在TCGA數(shù)...