CREATE TABLE city(
province_code INT,
province_name string,
city_code INT,
city_name string
)
ROW FORMAT delimited
fields terminated by ','
lines terminated by '\n';
load數(shù)據(jù)到hive表: load data local inpath '/home/bigdata//hive/city.txt' into table city;
查看創(chuàng)建表 show create table city;
查看表信息 desc city;
修改表:alter table city rename to city_name;
復(fù)制表: create table city like city_code;
刪除庫 drop database **;
刪除表 drop table **;
查表內(nèi)容 select * from city limit 10;
顯示列名 set hive.cli.print.header=true;
建庫
建表
查詢相關(guān)命令? 最大的10個:select * from city order by city_code desc limit 10;
擁有最多市的省份:省里市的數(shù)目:取最多的10個矢萃臁:
select province_name,count(city_name) as cnt from city group by province_name order by cnt desc limit 10;
去重統(tǒng)計 有多少個食枘:distinct
1)select count(distinct province_name) from city;
兩重:2)select count(1)
from
(
select province_name from city group by province_name
) a;
查出只有一個市的省份:省有多少市 辟躏;市為1
1)
select
province_name, cnt
from
(
select province_name,count(1) as cnt
from city
group by province_name
)a
where cnt =1;
2) select province_name count(1) as cnt from city group by province_name having cnt = 1;
外表:
table前加關(guān)鍵字external
遠端要用的location'/user/hzw/city/city.txt'
create external table city_ex(
province_code int,
province_name string,
city_code int,
city_name string)
row format delimited
fields terminated by','
lines terminated by'\n'
location '/user/hzw/city/';
內(nèi)表:
create table city_in(
province_code int,
province_name string,
city_code int,
city_name string)
row format delimited
fields terminated by','
lines terminated by'\n'
location '/user/hzw/city/';
2屿附、使用分區(qū)patition對應(yīng)表中的一個目錄;
靜態(tài)分區(qū)需要在sql語句中指定;
CREATE TABLE `user`(
`uid` INT,
`city_code` INT,
`model` string,
`access` string
)
row FORMAT delimited
fields terminated by ','
lines terminated by '\n';
load data local inpath '/home/bigdata/tanqi/hive/user.txt' into table user;
查詢user: select count(*) from user;
a.靜態(tài)分區(qū):
create table user_daily(
uid int,
city_code int,
model string,
access string
)
partitioned by (p_data string);
插入數(shù)據(jù)從user :
insert overwrite table user_daily partition (p_data='2017-09-01')
select * from user;
b.動態(tài)分區(qū)
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table user_daily partition (p_data)
select *,'2017-09-02' from user
union all
select *,'2017-09-04' from user;
ALTER TABLE user_daily PARTITION (p_date='2017-01-01') RENAME TO PARTITION (p_date='20170101');
3.作業(yè)
a) 統(tǒng)計WIFI環(huán)境下用戶最多的5個城市
Select city_code,count(*) as cnt from user where access='WIFI'
group by city_code order by cnt desc limit 5;
b) 統(tǒng)計用戶數(shù)>=3的access和city_code組合
select access,city_code,count(*) as cnt from user
group by access,city_code
having cnt >=3 order by cnt desc;
c) 機型中帶有ne(不區(qū)分大小寫)键菱,不同access及對應(yīng)用戶數(shù)
select access,count(*) as cnt from user
where lower(model) like '%ne%'
group by access order by cnt desc;
判斷 access里的名/總數(shù)的sum
select sum(if(access='WIFI',1,0))/count(1) from user;
select sum(if(access='2G',1,0))/count(1) from user;
select sum(if(access='4G',1,0))/count(1) from user;
if條件多
select
case
when uid % 10 in (0, 1, 2, 3) then '0-3'
when uid % 10 in (4, 5, 6, 7) then '4-7'
else '8-9'
end as interval,
count(*) as cnt
from user
group by
case
when uid % 10 in (0, 1, 2, 3) then '0-3'
when uid % 10 in (4, 5, 6, 7) then '4-7'
else '8-9'
end;
晚間
1、? list 集合
collect_set 去重的集合
collect_list 不去重
select collect_set(access) from user;
select collect_list(access) from user;
2又活、 hive 各種連接join
左連接 left outer join
右連接 right outer join
內(nèi)連接 inner join? 找出左右相同(AB相交)的記錄
全連接 full join
select user.uid,user.city_code,city.city_name
from
(select * from user where uid <=100) user
left join
(select * from city where province_code <=30)city
on (user.city_code = city.city_code)
limit 20;
select user.uid,user.city_code,city.city_name
from
(select * from user where uid <=100) user
full join
(select * from city where province_code <=30)city
on (user.city_code = city.city_code)
limit 20;
3、分組top N
分組 access
排序 序號 city_code
top1 序號=1
select access,city_code,uid
from
(
select uid, access,city_code,
row_number() over (partition by access order
by city_code desc)as row_num
from user
)a
where row_num = 1;
4岔激、累計? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 第一行? ? ? ? ? ? ? 當前行
select p_date,
sum(cnt) over(order by p_date asc rows between unbounded preceding and current row)
from
(
select p_date, count(*) as cnt
from user_daily
where p_date between '2017-09-01' and '2017-09-30'
group by p_date
)a
;