用戶留存計(jì)算
(1) 基于某日的次日留存率的計(jì)算
-- 思路:
-- select *
-- from 第一天 left join 第二天
-- on uid相同
select
a.dayno as 日期,
count(distinct a.uid) as 活躍用戶數(shù),
count(distinct b.uid) as 次日留存數(shù)
from (select * from act_user_info where app_name='相機(jī)'and dayno='2018-05-01')a
left join (select * from act_user_info where app_name='相機(jī)'and dayno='2018-05-02') b
on a.uid=b.uid
group by a.dayno
(2) 基于某日的次日留存率茅特,3日留存率忘分,7日留存率的計(jì)算
select
a.dayno as 日期,
count (distinct a.uid) as 活躍用戶數(shù),
count (distinct case when datediff(b.dayno,a.dayno)=1 then b.uid else null end) as 次日留存數(shù),
count (distinct case when datediff(b.dayno,a.dayno)=2 then b.uid else null end) as 3日留存數(shù),
count (distinct case when datediff(b.dayno,a.dayno)=6 then b.uid else null end) as 7日留存數(shù),
count (distinct case when datediff(b.dayno,a.dayno)=1 then b.uid else null end)/count (distinct a.uid) as 次日留存率,
count (distinct case when datediff(b.dayno,a.dayno)=2 then b.uid else null end)/count (distinct a.uid) as 3日留存率,
count (distinct case when datediff(b.dayno,a.dayno)=6 then b.uid else null end)/count (distinct a.uid) as 7日留存率
from (select * from act_user_info where app_name='相機(jī)'and dayno='2018-05-01')a
left join (select * from act_user_info where app_name='相機(jī)') b
on a.uid=b.uid
group by a.dayno
(3) 基于所有日的次日留存率棋枕,3日留存率白修,7日留存率的計(jì)算
#將上面的a分表代碼改為如下代碼即可
from (select * from act_user_info where app_name='相機(jī)')a
(4)用python實(shí)現(xiàn):
# 計(jì)算n日留存率
def cal_retention(n):
# 用于記錄出現(xiàn)過的user_id
user_list=[]
# 取最后一天的前N天
cal_date=pd.Series(df['date'].unique()).sort_values()[:-n]
# 用于存儲最后留存率結(jié)果
retention_rates=[]
for to_date in cal_date:
# 通過與已經(jīng)有記錄的用戶列表的集合對比,識別新用戶
new_user_list=set(df[df['date']==to_date]['user_id'])-set(user_list)
# 用于存儲最后留存率結(jié)果
user_list.extend(new_user_list)
# 第n天留存情況
user_ndate=df[df['date']==to_date+timedelta(n)]['user_id'].unique()
retention_cnt=0
for user_id in user_ndate:
if user_id in new_user_list:
retention_cnt+=1
retention_rate=retention_cnt/len(new_user_list)
# 匯總N日留存數(shù)據(jù)
retention_rates.append(retention_rate)
u_retention=pd.Series(retention_rates,index=cal_date)
return u_retention