第十二單元 常用函數(shù)轻猖、窗口函數(shù)(開(kāi)窗函數(shù))
1爆班、常用函數(shù)
1.1 字符串
--字符串拼接函數(shù) concat
select concat('abc','def');
select concat_ws('-','abc','def');
--求字符串長(zhǎng)度 length
select length('jsdfijsdkfjkdsfjkdf');
1.2 日期
--日期函數(shù) to_date
select to_date('2019-09-11 16:55:11');
--把字符串轉(zhuǎn)換成unix時(shí)間戳
select unix_timestamp('2019-09-11 11:55:11','yyyy-MM-dd HH:mm:ss');
--得到現(xiàn)在的時(shí)間戳
select unix_timestamp();
--把unix時(shí)間戳轉(zhuǎn)換成字符串
select from_unixtime(unix_timestamp(),'yyyy-MM-dd HH:mm:ss');
1.3 數(shù)學(xué)運(yùn)算函數(shù)
--四舍五入
select round(5.4)
--四舍五入保留2位小數(shù)
select round(5.1345,2)
--向上取整
select ceil(5.3)
--向下取整
select floor(5.3)
--取絕對(duì)值
select abs(-5.2)
--取最大值
select greatest(3,4,5,6,7)
select max
--取最小值
select least(3,4,5,6,7)
select min
--隨機(jī)數(shù)
rand();
2、窗口函數(shù)(開(kāi)窗函數(shù))
在開(kāi)窗函數(shù)出現(xiàn)之前存在著很多用 SQL 語(yǔ)句很難解決的問(wèn)題,很多都要通過(guò)復(fù)雜的相關(guān)子查詢(xún)或者存儲(chǔ)過(guò)程來(lái)完成。為了解決這些問(wèn)題仓技,在2003年ISO SQL標(biāo)準(zhǔn)加入了開(kāi)窗函數(shù),開(kāi)窗函數(shù)的使用使得這些經(jīng)典的難題可以被輕松的解決俗他。目前在 MSSQLServer脖捻、Oracle、DB2 等主流數(shù)據(jù)庫(kù)中都提供了對(duì)開(kāi)窗函數(shù)的支持兆衅,不過(guò)非常遺憾的是 MYSQL 暫時(shí)還未對(duì)開(kāi)窗函數(shù)給予支持地沮。
與聚合函數(shù)一樣嗜浮,開(kāi)窗函數(shù)也是對(duì)行集組進(jìn)行聚合計(jì)算,但是它不像普通聚合函數(shù)那樣每組只返回一個(gè)值摩疑,開(kāi)窗函數(shù)可以為每組返回多個(gè)值危融,因?yàn)殚_(kāi)窗函數(shù)所執(zhí)行聚合計(jì)算的行集組是窗口。
2.1 窗口---排序
1,18,a,male
2,19,b,male
3,19,c,male
4,22,d,female
5,22,e,female
6,16,f,female
7,30,g,male
8,26,h,female
建表:
create table t_student(id int,age int,name string,sex string)
row format delimited fields terminated by ',';
導(dǎo)入數(shù)據(jù):
load data local inpath '/root/student.dat' into table t_student;
row_number() over:
-- select * from t_student group by sex;能否執(zhí)行未荒?专挪?
-- select sex,max(age) from t_student group by sex; 能否執(zhí)行及志?片排?
-- select id,age,name,sex,max(age) from t_student group by sex; 能否執(zhí)行?速侈?
-- 常規(guī)group by 只能查聚合的字段或使用聚合函數(shù)率寡,若使聚合后返回多個(gè)值則需要更復(fù)雜的操作
select
id,age,name,sex,row_number() over(partition by sex order by age desc) rk
from t_student;
+-----+------+-------+---------+-----+--+
| id | age | name | sex | rk |
+-----+------+-------+---------+-----+--+
| 8 | 26 | h | female | 1 |
| 5 | 22 | e | female | 2 |
| 4 | 22 | d | female | 3 |
| 6 | 16 | f | female | 4 |
| 7 | 30 | g | male | 1 |
| 3 | 19 | c | male | 2 |
| 2 | 19 | b | male | 3 |
| 1 | 18 | a | male | 4 |
+-----+------+-------+---------+-----+--+
分組求Top1、TopN:
select tmp.* from
(select
id,age,name,sex,row_number() over(partition by sex order by age desc) rk
from t_student) tmp
where tmp.rk = 1;
select tmp.* from
(select
id,age,name,sex,row_number() over(partition by sex order by age desc) rk
from t_student) tmp
where tmp.rk <=3;
rank() over倚搬、dense_rank() over冶共、ntile(n) over
select
id,age,name,sex,rank() over(partition by sex order by age desc) rk
from t_student;
+-----+------+-------+---------+-----+--+
| id | age | name | sex | rk |
+-----+------+-------+---------+-----+--+
| 8 | 26 | h | female | 1 |
| 5 | 22 | e | female | 2 |
| 4 | 22 | d | female | 2 |
| 6 | 16 | f | female | 4 |
| 7 | 30 | g | male | 1 |
| 3 | 19 | c | male | 2 |
| 2 | 19 | b | male | 2 |
| 1 | 18 | a | male | 4 |
+-----+------+-------+---------+-----+--+
select
id,age,name,sex,dense_rank() over(partition by sex order by age desc) rk
from t_student;
+-----+------+-------+---------+-----+--+
| id | age | name | sex | rk |
+-----+------+-------+---------+-----+--+
| 8 | 26 | h | female | 1 |
| 5 | 22 | e | female | 2 |
| 4 | 22 | d | female | 2 |
| 6 | 16 | f | female | 3 |
| 7 | 30 | g | male | 1 |
| 3 | 19 | c | male | 2 |
| 2 | 19 | b | male | 2 |
| 1 | 18 | a | male | 3 |
+-----+------+-------+---------+-----+--+
select
id,age,name,sex,ntile(3) over(partition by sex order by age desc) rk
from
t_student;
+-----+------+-------+---------+-----+--+
| id | age | name | sex | rk |
+-----+------+-------+---------+-----+--+
| 8 | 26 | h | female | 1 |
| 5 | 22 | e | female | 1 |
| 4 | 22 | d | female | 2 |
| 6 | 16 | f | female | 2 |
| 7 | 30 | g | male | 1 |
| 3 | 19 | c | male | 1 |
| 2 | 19 | b | male | 2 |
| 1 | 18 | a | male | 2 |
+-----+------+-------+---------+-----+--+
2.2 窗口---求和
A,2015-01,5
A,2015-01,15
B,2015-01,5
A,2015-01,8
B,2015-01,25
A,2015-01,5
C,2015-01,10
C,2015-01,20
A,2015-02,4
A,2015-02,6
C,2015-02,30
C,2015-02,10
B,2015-02,10
B,2015-02,5
A,2015-03,14
A,2015-03,6
B,2015-03,20
B,2015-03,25
C,2015-03,10
C,2015-03,20
建表:
create table t_saller(name string,month string,amount int)
row format delimited fields terminated by ',';
導(dǎo)入數(shù)據(jù):
load data local inpath '/root/saller.dat' overwrite into table t_saller;
求每個(gè)銷(xiāo)售員每個(gè)月的銷(xiāo)售額和到當(dāng)月為止的累計(jì)銷(xiāo)售額:
-- 求每個(gè)銷(xiāo)售員每月銷(xiāo)售額,先按照銷(xiāo)售員分組,再按月份分組
select name,month,sum(amount) samount from t_saller group by name,month;
+-------+----------+----------+--+
| name | month | samount |
+-------+----------+----------+--+
| A | 2015-01 | 33 |
| A | 2015-02 | 10 |
| A | 2015-03 | 20 |
| B | 2015-01 | 30 |
| B | 2015-02 | 15 |
| B | 2015-03 | 45 |
| C | 2015-01 | 30 |
| C | 2015-02 | 40 |
| C | 2015-03 | 30 |
+-------+----------+----------+--+
-- 為避免過(guò)多子查詢(xún)每界,課提前創(chuàng)建 月銷(xiāo)售額表:
create table t_accumulate
as
select name,month,sum(amount) samount from t_saller group by name,month;
-- 累加 從最前面一行到當(dāng)前行 between unbounded preceding and current row
-- preceding 往前
-- unbounded 起點(diǎn)
-- current row 當(dāng)前行
-- unbounded preceding 最前一行
-- unbounded following 最后一行
-- following 窗口長(zhǎng)度
select
name,month,samount,
sum(samount) over(partition by name order by month rows between unbounded preceding and current row) accumlateAmount
from
t_accumulate;
+-------+----------+----------+------------------+--+
| name | month | samount | accumlateamount |
+-------+----------+----------+------------------+--+
| A | 2015-01 | 33 | 33 |
| A | 2015-02 | 10 | 43 |
| A | 2015-03 | 20 | 63 |
| B | 2015-01 | 30 | 30 |
| B | 2015-02 | 15 | 45 |
| B | 2015-03 | 45 | 90 |
| C | 2015-01 | 30 | 30 |
| C | 2015-02 | 40 | 70 |
| C | 2015-03 | 30 | 100 |
+-------+----------+----------+------------------+--+
--其他 between 2 preceding and 1 following
select
name,month,samount,sum(samount) over(partition by name order by month rows between 2 preceding and 1 following ) accumlateAmount
from
t_accumulate;
2.3 窗口---其他
min() over() ,max() over() , avg() over()
3捅僵、explode()
名詞解釋?zhuān)篹xplode 爆炸
1,zhangsan,化學(xué):物理:數(shù)學(xué):語(yǔ)文
2,lisi,化學(xué):數(shù)學(xué):生物:生理:衛(wèi)生
3,wangwu,化學(xué):語(yǔ)文:英語(yǔ):體育:生物
建表:
create table t_stu_subject(id int,name string,subjects array<string>)
row format delimited fields terminated by ','
collection items terminated by ':';
導(dǎo)入數(shù)據(jù):
load data local inpath '/root/stu_subject.dat' into table t_stu_subject;
需求:從學(xué)生學(xué)習(xí)課程分析這個(gè)學(xué)校都有哪些課程
-- 炸科目:explode(subjects)
select explode(subjects) from t_stu_subject;
-- 去重
select distinct tmp.subs from (select explode(subjects) subs from t_stu_subject) tmp;
lateral view 連接函數(shù)
select id,name,sub from
t_stu_subject
lateral view explode(subjects) tmp as sub;
+-----+-----------+------+--+
| id | name | sub |
+-----+-----------+------+--+
| 1 | zhangsan | 化學(xué) |
| 1 | zhangsan | 物理 |
| 1 | zhangsan | 數(shù)學(xué) |
| 1 | zhangsan | 語(yǔ)文 |
| 2 | lisi | 化學(xué) |
| 2 | lisi | 數(shù)學(xué) |
| 2 | lisi | 生物 |
| 2 | lisi | 生理 |
| 2 | lisi | 衛(wèi)生 |
| 3 | wangwu | 化學(xué) |
| 3 | wangwu | 語(yǔ)文 |
| 3 | wangwu | 英語(yǔ) |
| 3 | wangwu | 體育 |
| 3 | wangwu | 生物 |
+-----+-----------+------+--+
-- 單詞統(tǒng)計(jì) wordcount
-- split()
select
tmp.word,count(1) cnts from
(select explode(split(line,' ')) word from words) tmp
group by tmp.word order by cnts desc;
-- 炸map
select id,name,key,value from t_people
lateral view explode(family) tmp as key,value;
第十三單元 Hive 自定義函數(shù),Hive API眨层、Hive腳本
1月11號(hào)
第十四單元 其他
1月12號(hào)