1胆绊、取上周日日期和rolling12月的日期:
data timing;
format date1 yymmdd10. date2 yymmdd10.;
date2=intnx('week',today(),0);/上周末亿眠,數(shù)據(jù)截止日/
if mod(year(date2),4)=0 then do;
if (month(date2)100+day(date2))>228 then date1=date2-365;
else date1=date2-364;
end;
else if mod(year(date2)-1,4)=0 then do;
if month(date2)>2 then date1=date2-364;
else date1=date2-365;
end;
else date1=date2-364;/閏年,平年往前R12的date1計(jì)算/
month=year(date2)10000+month(date2)100+day(date2);/數(shù)據(jù)截止日*/
run;
2埂伦、轉(zhuǎn)置:
PROC TRANSPOSE DATA=newcatbycnt out=newcatbycnt1(drop=NAME);
BY CustomerID; /列不變/
ID newcat; /newcat變?yōu)樾?
VAR fag; /fag值/
run;
3、添加最后一筆往前rolling一年的日期oneyearago
data lastandfirst;
set lastandfirst;
format oneyearago yymmdd10.;
if month(lastpurchase) =2 and day(lastpurchase)=29 then do;
oneyearago=mdy(month(lastpurchase),day(lastpurchase)-1,
year(lastpurchase)-1)+1;
end;
else do;
oneyearago=mdy(month(lastpurchase),day(lastpurchase),
year(lastpurchase)-1)+1;
end;
run; /添加最后一筆往前rolling一年的日期oneyearago/
proc sort data=trans;by customer_id;run;
proc sort data=lastandfirst;by customer_id;run;
data trans;
merge trans(in=a) lastandfirst;
by customer_id;
if a;
oneyear=0;
if oneyearago<=purchasetime<=lasttime then oneyear=1;
run;
/************************************************************************/
4、/*********計(jì)算最后一筆往前rolling一年的av am af ipt************/
%macro rfm(input,timevar,output);
proc sql;
create table a1 as
select customer_id,purchasetime,sum(price) as salesbyday,sum(unit) as ui
from &input where &timevar=1 group by 1,2 having sum(price) ne 0;
quit;
proc sort data=a1;by customer_id purchasetime;run;
data &output(keep=customer_id av am af ipt);
set a1;
by customer_id;
if first.customer_id then do;
f=0;
v=0;
u=0;
end;
if salesbyday>0 then q=1;
else q=-1;
f+q;
v+salesbyday;
u+ui;
if last.customer_id then do;
if f=0 and v>0 then f=1;
if f ne 0 then do;
m=v/f;
ipt=u/f;
end;
av=v;
am=m;
af=f;
if f>0 then output;
end;
run;
%mend;
%rfm(trans,oneyear,rfm_1year);
/************************************************************************/
5距糖、導(dǎo)出CSV:
proc export data=result
outfile='D:\Lancome_ec\報(bào)表\sampler.csv' dbms=csv replace;
run;
6娶吞、導(dǎo)入CSV:
%macro importcsv(dataset,path);
proc import datafile=&path out=&dataset;run;
%mend;
%importcsv(dataset=listwave,path="D:\Lancome_ec\報(bào)表\listwave.csv");
7垒迂、導(dǎo)入excel:
PROC IMPORT OUT= WORK.recommend_rule
DATAFILE= "E:\EC\kie\ff-test\recommend_rule.xlsx"
DBMS=EXCEL REPLACE;
SHEET="Sheet1$";
GETNAMES=YES;
RUN;
8、導(dǎo)出excel:
PROC EXPORT DATA=&input
OUTFILE= "E:\EC\kie\ff-test\result&output..xls"
DBMS=EXCEL REPLACE label;
SHEET="sheet1";
RUN;
9妒蛇、/**************************excel多表導(dǎo)出*****************************/
方法1:
%macro report(input,output);
PROC EXPORT DATA=&input
OUTFILE= "E:\EC\kie\ff-test\定期項(xiàng)目\TMALL先試后買\Review\review&EC_date..xlsx"
DBMS=EXCEL REPLACE label;
SHEET="&output.";
RUN;
%mend;
%report(S2p_p2,先試后買推薦結(jié)果);
%report(P2p_p2,正裝推薦結(jié)果);
%report(Productnum,正裝回購(gòu)人數(shù));
%report(Samplenum,先試后買回購(gòu)人數(shù));
%report(p_time,正裝回購(gòu)參考日期);
%report(s_time,先試后買回購(gòu)參考日期);
方法2:
libname xlout excel "E:\EC\kie\ff-test\定期項(xiàng)目\TMALL先試后買\Review\review&EC_date..xlsx";
proc datasets lib=xlout kill;run;
data xlout.先試后買推薦結(jié)果;set S2p_p2;run;
data xlout.正裝推薦結(jié)果;set P2p_p2;run;
data xlout.正裝回購(gòu)人數(shù);set Productnum;run;
data xlout.先試后買回購(gòu)人數(shù);set Samplenum;run;
data xlout.正裝回購(gòu)參考日期;set p_time;run;
data xlout.先試后買回購(gòu)參考日期;set s_time;run;
libname xlout clear;
/***********************************************************************/
10机断、SAS 中可以利用PROC suveryselect 過(guò)程實(shí)現(xiàn)各種抽樣
其一般形式是:
PROC SURVEYSELECT
data=<源數(shù)據(jù)集名>
method = <srs l urs l sys >
out=<抽取樣本存放的數(shù)據(jù)集>
n=<抽取數(shù)量>(or samprate=抽樣比例)
seed =n;
strata <指定分層變量>;
id <指定抽取的樣本所保留的源數(shù)據(jù)集變量>;
run;
說(shuō)明:method用來(lái)指定隨機(jī)抽樣方法的楷拳,其中SRS是指不放回簡(jiǎn)單隨機(jī)抽樣(Simple Random Samping);urs是指放回簡(jiǎn)單隨機(jī)抽樣(Unrestricted Random Sampling)吏奸;sys是指系統(tǒng)抽樣(Systematic Sampling)欢揖。
seed用來(lái)指定隨機(jī)種子數(shù),為非負(fù)整數(shù)苦丁,取0則每次抽取的樣本不同浸颓,若取大于0的整數(shù),則下次抽樣時(shí)若輸入相同值即可得到相同的樣本旺拉;
id是指定從源數(shù)據(jù)集復(fù)制到樣本數(shù)據(jù)集的變量产上,若缺省,則復(fù)制所有變量蛾狗。
例子:
a晋涣、簡(jiǎn)單無(wú)重復(fù)隨機(jī)抽樣舉例:
/按30%的比例從test數(shù)據(jù)集中抽取樣本,并把樣本輸出到results數(shù)據(jù)集中/
proc surveyselect data=test1 out=results1 method=srs samprate=0.3;
run;
b沉桌、分層等比例隨機(jī)抽樣舉例谢鹊;
proc sort data=test2;
by 分層變量;
run; /先用分層變量對(duì)總體樣本進(jìn)行排序/
proc surveyselect data=test2 out=results2 method=srs samprate=0.1;
strata 分層變量;
run; /根據(jù)分層變量等比例從總體中抽取樣本/
c、分層不等比例抽樣舉例留凭;
(1)手工設(shè)置抽樣比例或者抽樣數(shù)
proc sort data=test3;
by 分層變量;
run; /先用分層變量對(duì)總體樣本進(jìn)行排序/
proc surveyselect data=test3 out=results3 method=srs
samprate=(0.1,0.3,0.5,0.2); /根據(jù)分層情況設(shè)置每一層要抽取的比例/
strata 分層變量;
run; /根據(jù)分層變量不等比例從總體中抽取樣本/
proc surveyselect data=test3 out=results3 method=srs
n=(30,20,50,40); /根據(jù)分層情況設(shè)置每一層要抽取的樣本數(shù)/
strata 分層變量;
run;
(2)根據(jù)抽樣表進(jìn)行不等比例抽樣
proc sort data=test3;
by 分層變量;
run; /先用分層變量對(duì)總體樣本進(jìn)行排序/
proc surveyselect data=test3 out=results3 method=SRS
samprate=samp_table; /通過(guò)抽樣比例數(shù)據(jù)集進(jìn)行抽樣佃扼,samp_table數(shù)據(jù)集中要包括分層變量 以及每一分層對(duì)應(yīng)的抽樣比例或者數(shù)量,如果按比例抽樣變量必須用rate來(lái)命名抽樣比例,如果是按數(shù)量抽樣必須用nsize來(lái)命名抽樣數(shù)量/
strata 分層變量;
run;
11蔼夜、輸出由freq出的類別的頻數(shù)和百分比構(gòu)成的表
ods csv file="G:\EC\KIE\online datamart&date\datamart_對(duì)比.csv";
/輸出datamart_對(duì)比表兼耀,總表中control和test包括以下6個(gè)量/
proc freq data=t_test;table cluster;run;
proc freq data=c_control;table cluster;run;
proc freq data=t_test;table auscat;run;
proc freq data=c_control;table auscat;run;
proc freq data=t_test;table Frequency_online;run;
proc freq data=c_control;table Frequency_online;run;
ods csv close;
12.官網(wǎng)datamart字段
最后一筆往前rolling一年時(shí)間段:
Frequency_online 、AnnualSpending_online 求冷、AUS_online 瘤运、ipt_online
數(shù)據(jù)截止日期往前rolling一年時(shí)間段:
Frequency_online_r12 、AnnualSpending_online_r12 匠题、ipt_r12 am=AUS_online_r12
status字段中:
new_r12:首筆購(gòu)買在數(shù)據(jù)截止日到往前rolling一年的時(shí)間內(nèi)拯坟;
newrepeat_r12:首筆購(gòu)買在數(shù)據(jù)截止日到往前rolling一年的時(shí)間內(nèi),并且次數(shù)>1韭山;
Winback_R12:最后一筆購(gòu)買在數(shù)據(jù)截止日到往前rolling一年的時(shí)間內(nèi)郁季,并且最后一筆距離倒數(shù)第二筆超過(guò)365天;
Existing_r12:數(shù)據(jù)截止日期往前rolling一年時(shí)間段內(nèi)有購(gòu)買钱磅、早于數(shù)據(jù)截止日期往前rolling一年有過(guò)購(gòu)買巩踏,并且不是Winback_R12的人;
Winback_YTD:最后一筆在今年購(gòu)買续搀,第一筆在去年之前塞琼,并且去年沒(méi)有購(gòu)買過(guò);
new_ytd:第一筆購(gòu)買在今年禁舷;
Existing_YTD:去年線上購(gòu)買過(guò)彪杉,今年線上又購(gòu)買毅往;
Active:最后一筆距離數(shù)據(jù)截止日的時(shí)間在360天以內(nèi);sleeping:最后一筆距離數(shù)據(jù)截止日的時(shí)間在361天到720天之間派近;
Lost:最后一筆距離數(shù)據(jù)截止日的時(shí)間在721天到1080天之間攀唯;
Lapsed:最后一筆距離數(shù)據(jù)截止日的時(shí)間超過(guò)1081天;
cluster字段:
cluster:最后一筆往前rolling一年的人購(gòu)買產(chǎn)品類別的cluster渴丸;
cluster_all:數(shù)據(jù)截止日往前rolling一年的人購(gòu)買產(chǎn)品類別的cluster侯嘀;