所需數(shù)據(jù):ORDER_INFO_UTF.CSV别伏、USER_INFO_UTF.CSV
數(shù)據(jù)鏈接:https://pan.baidu.com/s/11ZtjKnv5-nwyf6cMyk_3JQ
提取碼:yu63
目錄
1.數(shù)據(jù)的導(dǎo)入
1.1創(chuàng)建表結(jié)構(gòu)
1.1.1創(chuàng)建訂單表
2.對(duì)數(shù)據(jù)進(jìn)行分析
2.1? 統(tǒng)計(jì)每個(gè)月的下單人數(shù)
2.2? 統(tǒng)計(jì)復(fù)購率和回購率
2.3? 統(tǒng)計(jì)消費(fèi)者性別與消費(fèi)頻次的關(guān)系
2.4 ? 3吐根、4月份每日下單人數(shù),每日下單數(shù)
2.5? 統(tǒng)計(jì)消費(fèi)者性別與平均消費(fèi)金額的關(guān)系
2.6? 統(tǒng)計(jì)不同年齡段的消費(fèi)金額的占比
2.7? 統(tǒng)計(jì)每個(gè)時(shí)間段的下單人數(shù)
2.8? 統(tǒng)計(jì)消費(fèi)的二八法則,消費(fèi)的top20%用戶消費(fèi)額占總消費(fèi)額的占比
1.導(dǎo)入csv數(shù)據(jù)
1.1創(chuàng)建表結(jié)構(gòu)
1.1.1創(chuàng)建訂單表
CREATE TABLE ORDERINFO
? ? (ORDERID VARCHAR(10) NULL,#訂單ID递惋,主鍵
? ? USERID VARCHAR(10) NULL,#用戶ID柔滔,可以和用戶表進(jìn)行關(guān)聯(lián)
? ? ISPAID VARCHAR(10) NULL,#是否支付
? ? PRINCE VARCHAR(10) NULL,#訂單價(jià)格
? ? PAIDTIME DATETIME? NULL #訂單支付時(shí)間)
1.1.2創(chuàng)建用戶信息表
CREATE TABLE USER_INFO_ULF
? ? (USERID varchar(10) NULL,#用戶ID,主鍵
? ? SEX varchar(10) NULL,#性別
? ? BIRTH DATE NULL #出生日期)
1.1.3為表創(chuàng)建索引萍虽,提高查詢速度
#為user_info_utf創(chuàng)建主鍵索引
ALTER TABLE `user_info_utf` MODIFY COLUMN userid VARCHAR(10) PRIMARY KEY睛廊;
#為表orderinfo創(chuàng)建主鍵索引
ALTER TABLE `orderinfo` MODIFY COLUMN `ORDERID` VARCHAR(10) PRIMARY KEY;
#為表orderinfo創(chuàng)建外鍵索引
ALTER TABLE orderinfo ADD FOREIGN KEY(userid) REFERENCES user_info_utf(userid)
1.2導(dǎo)入csv數(shù)據(jù)
電腦安裝了sqlyog杉编,導(dǎo)入使用本地加載的csv數(shù)據(jù)超全,速度很快咆霜,一兩秒就導(dǎo)入成功了
2.數(shù)據(jù)處理
#不對(duì)未支付訂單進(jìn)行分析,為提高查詢效率嘶朱,刪除未支付訂單
DELETE
FROM `orderinfo`
WHERE ispaid='未支付'
#5月份數(shù)據(jù)只有7條蛾坯,難以進(jìn)行分析,故刪除5月份數(shù)據(jù)
DELETE FROM `orderinfo` WHERE paidtime>='2016-05-01'
2.1統(tǒng)計(jì)每個(gè)月的下單人數(shù)
SELECT SUBSTRING(paidtime,6,2) '下單月份',COUNT(DISTINCT(`USERID`)) AS '月下單總?cè)藬?shù)'
FROM`orderinfo`
GROUP BY SUBSTRING(paidtime,1,7)
2.2統(tǒng)計(jì)復(fù)購率和回購率
2.2.1? 3月份復(fù)購率
SELECT COUNT(*) AS '下單人數(shù)', COUNT(IF(pt>1,1,NULL)) AS '重復(fù)下單人數(shù)', paidmonth AS '月份', (COUNT(IF(pt>1,1,NULL))/COUNT(*)) AS '復(fù)購率'
FROM (SELECT userid,COUNT(*) AS pt,MONTH(`PAIDTIME`) AS paidmonthFROM orderinfoGROUP BY userid,paidmonth) AS uc
GROUP BY paidmonth
四月份復(fù)購率下降:四月份的下單人數(shù)與復(fù)購率均下滑疏遏,從復(fù)購率來看脉课,不是很高,可將提升的方向主要放在新用戶的獲取财异。
2.2.1? 3月份回購率
#創(chuàng)建3月份下單用戶id與下單次數(shù)的視圖
CREATE VIEW 3_userid
AS
SELECT COUNT(*) AS co1,userid?
FROM `orderinfo`?
WHERE SUBSTRING(paidtime,6,2)='03'?
GROUP BY userid;
#創(chuàng)建4月份下單用戶id與下單次數(shù)的視圖
CREATE VIEW 4_userid AS
SELECT COUNT(*) AS co2,userid
?FROM `orderinfo`?
WHERE? SUBSTRING(paidtime,6,2)='04'
GROUP BY userid;
#.四月份復(fù)購訂單數(shù)
SELECT (SELECT COUNT(co2) AS co3 FROM 4_userid)AS '四月份的訂單數(shù)',COUNT(*) AS '四月份的用戶復(fù)購訂單數(shù)',(COUNT(*)/(SELECT COUNT(co2) AS co3 FROM 4_userid)) AS '復(fù)購率'
FROM 3_userid
INNER JOIN 4_userid
ON 3_userid.userid=4_userid.userid
2.3? 3倘零、4月份每日下單人數(shù),每日下單數(shù)
#3.4月份每日下單人數(shù),每日下單數(shù)
SELECT SUBSTRING(paidtime,6,5) AS '日期',COUNT(DISTINCT(userid)) AS '每日下單人數(shù)',COUNT(*) AS '每日下單人數(shù)'
FROM orderinfo
GROUP BY SUBSTRING(paidtime,6,5);?
#導(dǎo)出sql數(shù)據(jù),用excel透視表制作
周日至周四消費(fèi)者購買數(shù)較活躍戳寸,周五周六較低迷:周五呈驶,周六訂單數(shù)明顯呈下降勢(shì),周六下單人數(shù)為一周中最少疫鹊。2016年第15周的星期一為清明節(jié)袖瞻,許多人外出,訂單數(shù)明顯減少拆吆。
2.4統(tǒng)計(jì)消費(fèi)者性別與消費(fèi)頻次的關(guān)系
思路:建立臨時(shí)表csu統(tǒng)計(jì)每個(gè)用戶的購買次數(shù)及其性別聋迎,用avg和group by分組計(jì)算不同性別的消費(fèi)頻次
SELECT AVG(csu.co2),sex
FROM (SELECT COUNT(o.userid) AS co2,sex,o.userid
FROM `orderinfo` AS o,`user_info_utf` AS u
WHERE o.userid=u.userid AND sex!=' '
GROUP BY o.userid,sex) AS csu?
GROUP BY sex
小結(jié):男性與女性平均購買次數(shù)差別不大,但三四月份下單總?cè)藬?shù)未8萬多枣耀,而下單用戶沒有性別信息的用戶有5萬多砌庄,所以計(jì)算出來的不同性別的平均購買次數(shù)參考意義不大
2.5統(tǒng)計(jì)消費(fèi)者性別與平均消費(fèi)金額的關(guān)系
思路:建立臨時(shí)表csc統(tǒng)計(jì)每個(gè)用戶的購買次數(shù)及其性別,用avg和group by分組計(jì)算不同性別的平均消費(fèi)金額
小結(jié):從數(shù)據(jù)中可看出男性與女性平均購買金額差別不大奕枢,但三四月份下單總?cè)藬?shù)未8萬多,而下單用戶沒有性別信息的用戶有5萬多佩微,所以計(jì)算出來的不同性別的平均購買金額參考意義不大
2.6統(tǒng)計(jì)不同年齡段的消費(fèi)金額的占比
SELECT COUNT(*),ROUND(SUM(`PRINCE`),2) AS '總消費(fèi)額',ROUND(AVG(`PRINCE`),2) AS '平均消費(fèi)額',
CASE
WHEN aget.age BETWEEN 0 AND 9 THEN '0-9歲'
WHEN aget.age BETWEEN 10 AND 19 THEN '10-19歲'
WHEN aget.age BETWEEN 20 AND 29 THEN '20-29歲'
WHEN aget.age BETWEEN 30 AND 39 THEN '30-39歲'
WHEN aget.age BETWEEN 40 AND 49 THEN '40-49歲'
WHEN aget.age BETWEEN 50 AND 59 THEN '50-59歲'
WHEN aget.age BETWEEN 60 AND 69 THEN '60-69歲'
ELSE '70歲及以上'
END 'age_range'
FROM (SELECT o.`PRINCE`,IF(MONTH(NOW())<MONTH(birth) AND DATE(NOW())<DATE(birth),
ROUND(YEAR(NOW())-YEAR(birth)),ROUND(YEAR(NOW())-YEAR(birth))-1) AS age,u.userid
FROM user_info_utf AS u
INNER JOIN orderinfo AS o
ON? u.`userid`=o.`USERID`
WHERE birth>1900-00-00) AS aget? #年齡表
GROUP BY age_range
不同年齡段消費(fèi)總額占比
不同年齡段人數(shù)占比
30-39年齡段的消費(fèi)者為消費(fèi)主力:30-39歲年齡段的消費(fèi)人數(shù)占比為44.4缝彬,消費(fèi)總額占比為45.3,是主要消費(fèi)人群哺眯,需重點(diǎn)關(guān)注谷浅。20-29歲與40-49歲的消費(fèi)總額占比也比較可觀,
2.7 統(tǒng)計(jì)每個(gè)時(shí)間段的下單人數(shù)
SELECT COUNT(*),SUBSTRING(TIME(paidtime),1,2) AS '時(shí)間段'
FROM `orderinfo`
GROUP BY SUBSTRING(TIME(paidtime),1,2)
早上11點(diǎn)和晚上10點(diǎn)是下單高峰:下單時(shí)間主要為上午10點(diǎn)到23點(diǎn)奶卓,上午11-12點(diǎn)與21-23點(diǎn)之間消費(fèi)者較為活躍一疯,19點(diǎn)左右下單人數(shù)明顯下降,這個(gè)時(shí)間段是下班高峰夺姑。
2.8 統(tǒng)計(jì)消費(fèi)的二八法則墩邀,消費(fèi)的top20%用戶,貢獻(xiàn)了多少額度
SELECT SUM(sprince),SUM(sprince)/(SELECT SUM(prince) FROM `orderinfo`) AS 'top20%的用戶消費(fèi)額占總消費(fèi)額的占比'
FROM (SELECT *,(@RowNum := @RowNum + 1) AS RowNum
FROM (SELECT userid,SUM(prince) AS sprince
FROM `orderinfo`
GROUP BY userid
ORDER BY sprince DESC) AS prince_r,(SELECT @RowNum := 0) AS myRows
WHERE @RowNum+1<(SELECT COUNT(DISTINCT(userid))*0.2 AS 'top20%的用戶數(shù)'
FROM `orderinfo`
ORDER BY prince)) AS prince_range
消費(fèi)的top20%用戶貢獻(xiàn)了85%的總消費(fèi)額:從數(shù)據(jù)上可看出top20%用戶對(duì)消費(fèi)總額上的貢獻(xiàn)很大盏浙,應(yīng)重點(diǎn)關(guān)注眉睹,可為其提供更高品質(zhì)荔茬,個(gè)性化的服務(wù)。