281.獲取定長連續(xù)子序列
create table savior (
id int , status int
);
insert into savior values (1,1);
insert into savior values (2,1);
insert into savior values (3,0);
insert into savior values (4,0);
insert into savior values (5,0);
insert into savior values (6,1);
insert into savior values (7,0);
insert into savior values (8,0);
insert into savior values (9,0);
insert into savior values (10,0);
insert into savior values (11,0);
insert into savior values (12,1);
insert into savior values (13,1);
insert into savior values (14,0);
insert into savior values (15,0);
要求:從 savior 表中獲取狀態(tài)為 0 的 id狈定,并且這些 id 能夠組成長度為 3 的連續(xù)子序列擂涛。
3~5
7~9
8~10
9~11
提供一種寫法,這種題目在上一篇中有類似的題目,發(fā)現(xiàn)最后一行是不符合題意的,怎么有效率過濾掉是難題....想不到確實不好求....
select
concat(id-1,'---',id+1)
from (
select id,
status,
max(status) over (order by id rows between 1 preceding and 1 following ) as max_number
from savior
) t1
where max_number =0 ;
方法二:使用自連接也可以求取,答案提供的是row_number獲取重復的便簽,使用自連接來獲取連續(xù)的值
WITH cte AS
(SELECT
*,
row_number() over (
ORDER BY id) AS rn
FROM
savior
WHERE STATUS = 0)
SELECT
CONCAT_WS('~', a.id, b.id) AS subseq
FROM
cte a
INNER JOIN cte b
ON a.id + 2 = b.id
AND a.rn + 2 = b.rn
方法三.使用偏移量函數(shù)來求取
學習一下max()等窗口函數(shù)嵌套if等,到底判斷的是什么???例如
select id,
status,
max(if(status = 1 , 5,status)) over (order by id rows between 1 preceding and 1 following ) as max_number
from savior
只要max中有一個數(shù)是滿足的那么結果就是5,!!!!!!
282.動態(tài)規(guī)劃
有100天的商品價格,按照買入賣出的順序,怎么操作能賺得最多的價值,以及是多少
create table dtgh (
data int
);
insert into dtgh values (1);
insert into dtgh values (3);
insert into dtgh values (4);
insert into dtgh values (10);
insert into dtgh values (9);
insert into dtgh values (7);
insert into dtgh values (12);
insert into dtgh values (1);
insert into dtgh values (2);
insert into dtgh values (1);
insert into dtgh values (1);
這是一道動態(tài)規(guī)劃題,使用SQL怎么寫
select
concat(min(data),',',max(data)) ,max(data) - min(data)
from (
select data,
sum(`lag`) over (order by rn ) as money
from (
select data,
if(data - num < 0, 1, 0) as `lag`,
row_number() over () as rn
from (
select data,
lag(data, 1, data + 1) over () as num
from dtgh
) t1
) t2
) t3
group by money
having count(1) > 1 and ( max(data) - min(data) > 0 );
283.層級查詢(一)
在工作中都遇到過存在層次關系的數(shù)據(jù)表祷杈,典型的例子諸如菜單表(多級菜單)嫌术、用戶表(擁有上下級關系)、商品類目表(多級類目)
empno ename mgr
------ ------ --------
7369 SMITH 7902
7499 ALLEN 7698
7521 WARD 7698
7566 JONES 7839
7654 MARTIN 7698
7698 BLAKE 7839
7782 CLARK 7839
7788 SCOTT 7566
7839 KING (NULL)
7844 TURNER 7698
7876 ADAMS 7788
7900 JAMES 7698
7902 FORD 7566
7934 MILLER 7782
其中氮块,mgr 為 NULL 表明該員工沒有上級領導吼野。
我們要把每個員工的所有上級領導都找出來校哎,實現(xiàn)的效果如下:
empno ename path
------ ------ ----------------------
7369 SMITH ->FORD->JONES->KING
7499 ALLEN ->BLAKE->KING
7521 WARD ->BLAKE->KING
7566 JONES ->KING
7654 MARTIN ->BLAKE->KING
7698 BLAKE ->KING
7782 CLARK ->KING
7788 SCOTT ->JONES->KING
7839 KING
7844 TURNER ->BLAKE->KING
7876 ADAMS ->SCOTT->JONES->KING
7900 JAMES ->BLAKE->KING
7902 FORD ->JONES->KING
7934 MILLER ->CLARK->KING
對于編號為 7369 的 SMITH,他的直屬領導的編號是 7902瞳步,姓名叫做 FORD闷哆;FORD 的直屬領導叫做 JONES,編號為 7566单起;編號為 7566 的直屬領導是編號為 7839 的 KING抱怔,而 KING 沒有直屬領導。因此嘀倒,SMITH 的上級領導的關系鏈構成:->FORD->JONES->KING
WITH RECURSIVE leader_path(empno, ename, mgr, path) AS
(SELECT
empno,
ename,
mgr,
CAST('' AS CHAR(100)) AS path
FROM
emp
UNION ALL
SELECT
a.empno,
a.ename,
b.mgr,
CONCAT(
a.path,
IFNULL(CONCAT('->', b.ename), '')
)
FROM
leader_path a
LEFT JOIN emp b
ON a.mgr = b.empno
WHERE b.empno IS NOT NULL)
SELECT
empno,
ename,
path
FROM
leader_path
WHERE mgr IS NULL
ORDER BY 1
在遞歸中一定要加入終止條件屈留,本 SQL 的終止條件是 WHERE b.empno IS NOT NULL局冰;
遇到字符串拼接需要提前設置該字段的長度,對應到 SQL 中的操作是 CAST('' AS CHAR(100))灌危;
遞歸會生成中間結果康二,我們要把中間結果過濾掉,WHERE mgr IS NULL 就是只獲取最終的結果乍狐。
284.層級查詢(二)
在mysql中實現(xiàn)層次查詢的兩種方式赠摇。層級查詢(一)舉的示例是獲取從葉子點到根節(jié)點的路徑,層級查詢(二)要實現(xiàn)的是從根節(jié)點找到所有葉子節(jié)點浅蚪。
WITH RECURSIVE leader_path (empno, ename, mgr, lv) AS
(SELECT
empno,
ename,
mgr,
1 AS lv
FROM
emp WHERE mgr IS NULL
UNION ALL
SELECT
b.empno,
b.ename,
b.mgr,
lv + 1
FROM
leader_path a
INNER JOIN emp b
ON a.empno = b.mgr )
SELECT
empno,
ename,
lv
FROM
leader_path
ORDER BY 1
結果如下:
empno ename lv
------ ------ --------
7369 SMITH 4
7499 ALLEN 3
7521 WARD 3
7566 JONES 2
7654 MARTIN 3
7698 BLAKE 2
7782 CLARK 2
7788 SCOTT 3
7839 KING 1
7844 TURNER 3
7876 ADAMS 4
7900 JAMES 3
7902 FORD 3
7934 MILLER 3
285.獲取一行中多個字段的最大值
id v1 v2 v3
------ ------ ------ --------
1 100 80 102
2 2 -20 -1
3 999 12 111
4 1234 2222 -123
5 871 888 666
6 -210 9 1024
7 0 -1 0
8 2 2 2
查詢結果
id v_max
------ --------
1 102
2 2
3 999
4 2222
5 888
6 1024
7 0
8 2
方法一:GREATEST()函數(shù)
SELECT
id,
GREATEST(v1, v2, v3) AS v_max
FROM
chaos
方法二:嵌套的 IF 語句
v12 = IF(v1 > v2, v1, v2)
v_max = IF(v12 > v3, v12, v3)
即:
--有點復雜.....
SELECT
id,
IF(
IF(v1 > v2, v1, v2) > v3,
IF(v1 > v2, v1, v2),
v3
) AS v_max
FROM
chaos
方法三:使用union all 扁平化
WITH chaos_union AS
(SELECT
id,
v1 AS v
FROM
chaos
UNION ALL
SELECT
id,
v2 AS v
FROM
chaos
UNION ALL
SELECT
id,
v3 AS v
FROM
chaos)
SELECT
id,
MAX(v) AS v_max
FROM
chaos_union
GROUP BY id
286.分位函數(shù)
hive中求中位數(shù),我一般是用正序和逆序來解決,這里提供hive中的分位函數(shù)來求取中位數(shù)
percentile:percentile(col, p) col是要計算的列(值必須為int類型)藕帜,p的取值為0-1,若為0.2惜傲,那么就是2分位數(shù)洽故,依次類推。
percentile_approx:percentile_approx(col, p)盗誊。列為數(shù)值類型都可以时甚。
percentile_approx還有一種形式percentile_approx(col, p,B)哈踱,參數(shù)B控制內(nèi)存消耗的近似精度荒适,B越大,結果的精度越高开镣。默認值為10000刀诬。當col字段中的distinct值的個數(shù)小于B時,結果就為準確的百分位數(shù)
287.打標簽(1)
下面兩題是攜程數(shù)倉的面試題
create table if not exists xiechen (
time_day date,product varchar(4) ,profit int
);
insert into xiechen values ('2021-01-01','A',2);
insert into xiechen values ('2021-01-02','A',3);
insert into xiechen values ('2021-01-03','A',4);
insert into xiechen values ('2021-01-04','A',5);
insert into xiechen values ('2021-01-01','B',2);
insert into xiechen values ('2021-01-02','B',4);
insert into xiechen values ('2021-01-03','B',3);
insert into xiechen values ('2021-01-04','B',4);
insert into xiechen values ('2021-01-01','B',5);
求每組收益連續(xù)增加3天以上的天數(shù)
select concat(min(time_day),'--',max(time_day)) , max(product)
from (
select time_day,
product,
profit,
sum(lag_rn) over (partition by product order by time_day) as rn
from (
select time_day,
product,
profit,
if(profit - lag(profit, 1, profit) over (partition by product) <= 0, 1, 0) as lag_rn
from xiechen) t1
) t2
group by product,rn
having count(rn) >= 3;
288.打標簽(2)
題目:在第一題的基礎上,求日期是連續(xù)的且收益是連續(xù)3天遞增的
解答:
在第一題的基礎上在group by 后的having后增加:having count(rn) >= 3 and datediff(max(time_day),min(time_day)) + 1 = count(rn);
只要datediff(max(time_day),min(time_day)) 間隔數(shù)等于總的行數(shù)就是答案
select concat(min(time_day),'--',max(time_day)) , max(product)
from (
select time_day,
product,
profit,
sum(lag_rn) over (partition by product order by time_day) as rn
from (
select time_day,
product,
profit,
if(profit - lag(profit, 1, profit) over (partition by product) <= 0, 1, 0) as lag_rn
from xiechen) t1
) t2
group by product,rn
having count(rn) >= 3 and datediff(max(time_day),min(time_day)) + 1 = count(rn);
289.魔力貓盒面試題
主糧,渴望,0.45
主糧,哈根紐翠斯,0.15
主糧,愛肯納,0.40
罐頭,儀親,0.05
罐頭,麥克勞德醫(yī)生,0.8
罐頭,happy100,0.15
零食,儀親,0.24
零食,益智選,0.33
零食,mikbone,0.20
零食,巔峰,0.23
要求:求取amount_percent從大到小邪财,累計和大于等于75%,的情況陕壹,且以大于等于75%為界限
select cat_name,brand_name,amount_percent from(
select
cat_name,brand_name,amount_percent,calculate_percent,
lag(calculate_percent,1,0) over(partition by cat_name order by calculate_percent) as lag_persent
from
(
select
cat_name,brand_name,amount_percent,
sum(amount_percent) over(partition by cat_name order by amount_percent desc) as calculate_percent
from group_precent_top_n) as a1
) as a2
where (calculate_percent >= 0.75 and lag_persent < 0.75) or
(calculate_percent < 0.75 and lag_persent < 0.75)
;
其實不必這么寫,這道題考查是溝通需求,過濾出特殊的數(shù)據(jù)行或者改寫需求
提供其他方法:
- 使用hive中的桶函數(shù)分20個桶
- 如上面的解答方法使用sum窗口來求
- 使用row_number() 和count() 窗口來求
- 使用自連接來求
290.簡單用戶畫像
有表sale:order_id 訂單id,唯一字段 user_id 用戶id product_id 訂單id create_time 訂單的創(chuàng)建時間 ,product_num 訂單數(shù)
有表user_info user_id 用戶id sex age
求:使用一條SQL生成完整的用戶畫像包含,用戶id sex age d7order_num d14_order_num.后面兩個字段分別為近七天的訂單數(shù),近14天的訂單數(shù)
使用sum(if)來寫
select t1.user_id ,
sex,
age,
sum(if(datediff('2021-3-21',create_time)<=7,product_num,0)) as d7order_num ,
sum(if(datediff('2021-3-21',create_time)<=14,product_num,0)) as d14order_num
from user_info t1 left join sale t2 on t1.user_id = t2.user_id
group by t1.user_id, sex, age;
使用join來寫
select t1.user_id,sex,age,t2.d14order_num,t3.d7order_num
from user_info t1 left join (
select user_id ,sum(product_num) as d14order_num
from sale
where datediff('2021-3-21',create_time)<=14
group by user_id
) t2 on t1.user_id = t2.user_id
left join (
select user_id ,sum(product_num) as d7order_num
from sale
where datediff('2021-3-21',create_time)<= 7
group by user_id
) t3 on t1.user_id = t3.user_id;