1. 題目要求
現(xiàn)有hive表temp胆数,其中只有一個字段(temp_record string)完慧,每一行代表某一天的氣溫刻蚯,比如绊含,2014010114代表,2014年1月1日的氣溫為14度炊汹,表中數(shù)據(jù)如下:
hive> select * from temp;
2014010114
2014010216
2014010317
2014010410
2014010506
2012010609
2012010732
2012010812
2012010919
2012011023
2001010116
2001010212
2001010310
2001010411
2001010529
2013010619
2013010722
2013010812
2013010929
2013011023
2008010105
2008010216
2008010337
2008010414
2008010516
2007010619
2007010712
2007010812
2007010999
2007011023
2010010114
2010010216
2010010317
2010010410
2010010506
2015010649
2015010722
2015010812
2015010999
2015011023
要求:用hive求出每一年的最大氣溫那條記錄
注意:數(shù)據(jù)格式不能改變躬充,例如求出來2015年的最大氣溫那條記錄為2015010999
2. 參考答案
--(1)
create table temp_column as
SELECT substring(temp_record, 1, 4) year, substring(temp_record, 5, 4) day, substring(temp_record, 9) temp
FROM temp;
/* temp_column表中數(shù)據(jù):
2014 0101 14
2014 0102 16
2014 0103 17
2014 0104 10
......
*/
--(2)
create table temp_index as
select year, day, temp,
row_number() over(distribute by year sort by temp desc) as index
from temp_column;
/* temp_index表中數(shù)據(jù):
2001 0105 29 1
2001 0101 16 2
2001 0102 12 3
2001 0104 11 4
2001 0103 10 5
......
*/
--(3)
select concat(year, day, temp) from temp_index where index = 1;
/* 最終結(jié)果:
2001010529
2007010999
2008010337
2010010317
2012010732
2013010929
2014010317
2015010999
*/