1.拿出發(fā)標時間在2018年4月至2018年6月成交的新客客戶,區(qū)分M站與APP,計算他們的年齡倒淫,性別碉咆,1期30+逾期金額
2.計算發(fā)標前一個月內(nèi)抖韩,用戶午夜通話次數(shù)的占比
3.拿出題1中的用戶在發(fā)標前1周的平均通話間隔時長
(邏輯:半夜通話較多的用戶,除特殊工種以外疫铜,有可能近期遇到緊急事件茂浮,需要到處籌錢
發(fā)標前一周頻繁通話,是否是頻繁接到催收電話,或者急用錢到處打電話借錢)
~逾期表 yuqi
userid, listingid, creation_date, principal(本金), duedate_30 (30+逾期金額) , risk_category(發(fā)標類型)
~通話歷史表 tonghualishi
userid席揽,calltime(通話時間)顽馋,duration(通話時長秒)
~身份信息的表 shenfenxinxi
字段:userid, idnumber, reg_time(入表時間)
yuqi
shenfenxinxi
tonghualishi
創(chuàng)建表:
#逾期表 yuqi
create table yuqi (userid int,
listingid int,
creation_date date,
principal varchar(45),
duedate_30 varchar(45),
risk_category varchar(45))
insert into yuqi values (001,1001,'2018-04-01',2000,0,'非APP新客'),
(002,1002,'2018-05-01',4000,0,'APP新客')
#身份信息的表 shenfenxinxi
create table shenfenxinxi (userid int,
idnumber varchar(45),
reg_time date)
insert into shenfenxinxi values (001,'330621940801000','2017-01-01'),
(002,'33062119980101001x','2017-01-01')
#通話歷史表 tonghualishi
create table tonghualishi (userid int,
calltime datetime,
duration time,
inserttime datetime)
insert into tonghualishi values (001,'2018-03-26 02:00:30','00:00:30','2017-01-01 00:00:30'),
(001,'2018-03-25 06:00:00','00:00:40','2017-01-01 00:00:30'),
(001,'2018-03-29 04:00:00','00:01:20','2017-01-01 00:00:30'),
(002,'2018-04-10 04:00:00','00:00:20','2017-01-01 00:00:30'),
(002,'2018-04-30 12:00:10','00:00:50','2017-01-01 00:00:30')
解答:
1.拿出發(fā)標時間在2018年4月至2018年6月成交的新客客戶,區(qū)分M站與APP幌羞,計算他們的年齡寸谜,性別,1期30+逾期金額
#1,拿出發(fā)標時間在2018年4月至2018年6月的M站與APP的成交的新客客戶属桦,并且區(qū)分M站與APP
#此處不用status like '%成功%'的原因是熊痴,已經(jīng)取得是逾期表中得數(shù)據(jù),說明都已經(jīng)是有借款成功的
create table t0 as
select userid, listingid, creation_date, principal, duedate_30,
case when risk_category='非APP新客' then '新客閃電' else '新客APP' end as cj_type
from yuqi
where creation_date>='2018-04-01' and creation_date<'2018-07-01'
and risk_category in ('非APP新客','APP新客')
t0
#2,計算年齡聂宾,性別
create table t1 as
select userid, listingid, creation_date, principal, duedate_30, cj_type,
case when length(a.idnumber)=18 then ((date_format(creation_date,'%Y')-substr(idnumber,7,4))
-case when date_format(creation_date,'%m%d')<substr(idnumber,11,4) then 1 else 0 end)
else ((date_format(creation_date,'%Y')-concat('19',substr(idnumber,7,2)))
-case when date_format(creation_date,'%m%d')<substr(idnumber,9,4) then 1 else 0 end)
end as age,
case when length(a.idnumber)=18 and substring(a.idnumber,-2,1) in ('0','2','4','6','8') then 'female'
when length(a.idnumber)=18 and substring(a.idnumber,-2,1) not in ('0','2','4','6','8') then 'male'
when length(a.idnumber)=15 and substring(a.idnumber,-1,1) in ('0','2','4','6','8') then 'female'
when length(a.idnumber)=15 and substring(a.idnumber,-1,1) not in ('0','2','4','6','8') then 'male'
else '其他'
end as gender
from (select a.*, b.idnumber
from t0 a
inner join shenfenxinxi b on a.userid=b.userid) a
t1
2.計算發(fā)標前一個月內(nèi)果善,用戶午夜通話次數(shù)的占比
#1,拿出目標用戶
create table t2 as
select a.userid, a.listingid, a.creation_date, a.principal, a.duedate_30, a.cj_type, b.calltime
from t1 a
inner join tonghualishi b
on a.userid=b.userid
and datediff(a.creation_date,b.calltime)>0
and datediff(a.creation_date,b.calltime)<=30
and a.creation_date>=inserttime
t2
#2,計算總通話次數(shù)
create table t3 as
select userid, listingid, count(calltime) as zth_cs
from t2
group by userid, listingid
t3
#3,計算通話時間段在2:00至5:00的通話次數(shù)
create table t4 as
select userid, listingid, count(calltime) as wyth_cs
from t2
where substr(calltime,12,5) >='02:00' and substr(calltime,12,5) <='05:00'
group by userid,listingid
t4
#4,計算占比
create table t5 as
select a.userid, a.listingid, a.creation_date, a.principal, a.duedate_30, a.cj_type, (coalesce(c.wyth_cs,0)/b.zth_cs) as wy_zb
from t2 a
inner join t3 b
on a.listingid=b.listingid
left join t4 c
on a.listingid=c.listingid
where b.zth_cs>0
t5
3.拿出題1中的用戶在發(fā)標前1周的平均通話間隔時長
#1,拿出發(fā)標前1周的通話記錄
create table t6 as
select a.userid, a.listingid, a.creation_date, b.calltime, b.duration
from t1 a
inner join tonghualishi b
on a.userid=b.userid
and datediff(a.creation_date,b.calltime)>0
and datediff(a.creation_date,b.calltime)<=7
and a.creation_date>=b.inserttime
t6
#2,將用戶的通話記錄按照通話時間進行排序
create table t7 as
select a.*,
(select count(*) from t6 b where a.listingid=b.listingid and a.calltime>=b.calltime) as r
from t6 a
t7
#3,相鄰的兩個通話,上一個加上通話時間亏吝,然后進行相減岭埠,求出通話間隔;計算平均值
#內(nèi)嵌部分
select a.userid, a.listingid, a.calltime as t1, b.calltime as t2, a.duration
from t7 a
inner join t7 b
on a.listingid=b.listingid
and a.r = b.r-1
#求平均之前
select userid, listingid, unix_timestamp(t2)-(unix_timestamp(t1)+duration)
from (select a.userid, a.listingid, a.calltime as t1, b.calltime as t2, a.duration
from t7 a
inner join t7 b
on a.listingid=b.listingid
and a.r = b.r-1) a
#最終組合結(jié)果
create table t8 as
select userid, listingid, avg( unix_timestamp(t2)-(unix_timestamp(t1)+duration) ) as thjg_avg
from (select a.userid, a.listingid, a.calltime as t1, b.calltime as t2, a.duration
from t7 a
inner join t7 b
on a.listingid=b.listingid
and a.r = b.r-1) a
嵌套部分
求平均之前
t8