SAS:数据步骤。通过处理

时间:2014-03-09 17:39:02

标签: sas datastep

如何汇总以下样本数据以提供客户级别的计算?我正在使用“通过处理”的数据步骤,但我不确定我是否应该将其分解为两个数据步骤。

我需要提取第一个type,第一个price,一个types计数,一个唯一prices计数,一个足球投注计数和一个计数每个球员的棒球投注。

我似乎无法在同一数据步骤中同时合并typeprice

data have;
input username $  betdate : datetime. stake type $ price sport $;
dateOnly = datepart(betdate) ;
format betdate DATETIME.;
format dateOnly ddmmyy8.;
datalines; 
player1 12NOV2008:12:04:01 90 SGL 5 SOCCER
player1 04NOV2008:09:03:44 30 SGL 4  SOCCER
player2 07NOV2008:14:03:33 120 SGL 5 SOCCER
player1 05NOV2008:09:00:00 50 SGL 4 SOCCER
player1 05NOV2008:09:05:00 30 DBL 3 BASEBALL 
player1 05NOV2008:09:00:05 20 DBL 4 BASEBALL 
player2 09NOV2008:10:05:10 10 DBL 5 BASEBALL 
player2 15NOV2008:15:05:33 35 DBL 5 BASEBALL 
player1 15NOV2008:15:05:33 35 TBL 5 BASEBALL
player1 15NOV2008:15:05:33 35 SGL 4 BASEBALL
run;
proc print;run;

proc sort data=have; by username dateonly betdate type price; run;
data want;
set have;
retain typecount pricecount firsttype firstprice soccercount baseballcount;
by username dateonly betdate;   
if first.username then eventTime = 0;
if first.betdate then eventTime + 1;

if first.username then soccercount=0;
if first.username then baseballcount=0;
if index(upcase(sport),'SOCCER') and eventtime <=5 then soccercount+1;
else if eventtime <=5 then baseballcount+1;

if first.username and eventtime =1 then firsttype=type;
else if eventtime =1 then firsttype=type;

if first.username and eventtime =1 then firstprice=price;
else if eventtime =1 then firstprice=price;

if first.username then typecount=0;
if first.type then typecount+1;

if first.username then pricecount=0;
if first.price and eventtime <=5 then pricecount+1;

IF last.username THEN OUTPUT;
keep username soccercount baseballcount firsttype firstprice typecount pricecount;
run;
proc print;run;

1 个答案:

答案 0 :(得分:1)

这应该要求您在一个datastep中请求:

proc sort data =have; by by username dateonly betdate;   run; 

data want(drop=  betdate dateonly  stake type  price sport TYPELIST PRICELIST) ;
set have;
LENGTH TYPELIST PRICELIST $200; *ARBITRARY LARGE LENGTH;

retain firsttype firstprice TYPELIST typecount PRICELIST pricecount soccercount baseballcount;
by username dateonly betdate;   

if first.username then do ; 
    firsttype=type; 
    firstprice=PRICE; 
     typecount=0; pricecount=0; soccercount=0; baseballcount=0; 
    TYPELIST=""; PRICELIST="";
END; 

if index(upcase(sport),'SOCCER') then soccercount+1;
if index(upcase(sport),'BASEBALL') then baseballcount+1;

IF find(TYPELIST,TYPE,'it')=0 THEN TYPELIST=CATX("|",TYPELIST,TYPE); 
IF findc(PRICELIST,PRICE,'it')=0 THEN PRICELIST=CATX("|",PRICELIST,PRICE); 

IF last.username THEN DO; 
 typecount=LENGTH(TYPELIST)-LENGTH(COMPRESS(TYPELIST,"|"))+1; 
 pricecount=LENGTH(PRICELIST)-LENGTH(COMPRESS(PRICELIST,"|"))+1; 

OUTPUT; 
END; 
run;
proc print data=want;run;