如何使用sas

时间:2017-04-30 06:54:36

标签: sas

我目前正在使用两个SAS数据集(样本和主数据)进行SAS编程。以下是为说明目的而创建的假设数据或虚拟数据,以通过SAS编程解决我的问题。我想从主数据集(测试)中提取样本数据集中id的数据。我给出了一个例子,其中很少有id作为样本数据集,我需要根据年度信息(主要表格中的所需输出)从主表(测试)中提取下一个12个月的信息。第三输出)。

以下是提取前12个月数据的代码,但我不知道如前几个月提取下一个12个月的记录,任何人都可以帮助我使用优化方式的SAS编程来解决这个问题。

proc sort data=test;
by id yearmonth;
run;

data result;
set test;
array prev_month {13} PREV_MONTH_0-PREV_MONTH_12;
by id;
if first.id then do;
do i =1 to 13;
prev_month(i)=0;
end;
end;
do i = 13 to 2 by -1;
prev_month(i)=prev_month(i-1);
end;
prev_month(1)=no_of_cust;
drop i prev_month_0;
retain prev_month:;
run;

data sample1;
set sample(drop=no_of_cust);
run;

proc sort data=sample1;
by id yearmonth;
run;

data all;
merge sample1(in=a) result(in=b);
by id yearmonth;
if a;
run;

一个样本数据集(数据集名称 - 样本)。

ID  YEARMONTH   NO_OF_CUST
1    200909        50
1    201005        65
1    201008        78
1    201106        95
2    200901        65
2    200902        45
2    200903        69
2    201005        14
2    201006        26
2    201007        98

一个主数据集 - 数据集名称(测试)(从帐户开始到日期的每个ID的年份庞大数据集。)

ID  YEARMONTH   NO_OF_CUST
1   200808        125
1   200809        125
1   200810        111
1   200811        174
1   200812        98
1   200901        45
1   200902        74
1   200903        73
1   200904        101
1   200905        164
1   200906        104
1   200907        22
1   200908        35
1   200909        50
1   200910        77
1   200911        86
1   200912        95
1   201001        95
1   201002        87
1   201003        79
1   201004        71
1   201005        65
1   201006        66
1   201007        66
1   201008        78
1   201009        88
1   201010        54
1   201011        45
1   201012        100
1   201101        136
1   201102        111
1   201103        17
1   201104        77
1   201105        111
1   201106        95
1   201107        79
1   201108        777
1   201109        758
1   201110        32
1   201111        15
1   201112        22
2   200711        150
2   200712        150
2   200801        44
2   200802        385
2   200803        65
2   200804        66
2   200805        200
2   200806        333
2   200807        285
2   200808        265
2   200809        222
2   200810        220
2   200811        205
2   200812        185
2   200901        65
2   200902        45
2   200903        69
2   200904        546
2   200905        21
2   200906        256
2   200907        214
2   200908        14
2   200909        44
2   200910        65
2   200911        88
2   200912        79
2   201001        65
2   201002        45
2   201003        69
2   201004        54
2   201005        14
2   201006        26
2   201007        98

所需的输出应如下所示,

ID  YEARMONTH   NO_OF_CUST  AFTER_MONTH_1   AFTER_MONTH_2   AFTER_MONTH_3   AFTER_MONTH_4   AFTER_MONTH_5   AFTER_MONTH_6   AFTER_MONTH_7   AFTER_MONTH_8   AFTER_MONTH_9   AFTER_MONTH_10  AFTER_MONTH_11  AFTER_MONTH_12
1     200909        50         77              86                  95          95              87                79                71               65           66           66                78                88

1 个答案:

答案 0 :(得分:0)

Step1: 加入您的样本表与主(测试)表,并使用intnx获取未来12个月的所有值。
第2步:在“月之后”制作列名称
第3步: 转置以获得最终输出

proc sql;
create table  abc as
select a.id,a.yearmonth,b.yearmonth as yearmonth1, b.no_of_cust
from 
sample a
left join
test b
on a.id = b.id  and a.yearmonth <= b.yearmonth <= intnx("month",a.yearmonth,12)
order by a.id,a.yearmonth,b.yearmonth;
quit;

data abc1(drop=col yearmonth1);
set abc;
by id yearmonth;
if first.yearmonth then col=-1;
col+1;
columns = compress("after_month_"||col);
run;

proc transpose data=abc1 out=abc2(rename=(after_month_0 = no_of_cust) drop=_name_);
by id yearmonth;
id columns;
var no_of_cust;
run;

我的输出:
 enter image description here


如果您想在查询中进行更改,则可以使用以下代码。

proc sort data=test;
by id descending yearmonth;
run;

data result;
    set test;
    array after_month {13} after_MONTH_0-after_MONTH_12;
    by id;

    if first.id then do;
        do i = 1 to 13;
            after_month(i) = 0;
        end;
    end;

    do i = 13 to 2 by -1;
        after_month(i) = after_month(i-1);
    end;
    after_month(1) = NO_OF_CUST;

    drop i after_MONTH_0;
    retain after_MONTH:;
run;  

data sample1;
set sample(drop=no_of_cust);
run;

proc sort data=result;
by id yearmonth;
run;
proc sort data=sample1;
by id  yearmonth;
run;

data all;
merge sample1(in=a) result(in=b);
by id yearmonth;
if a;
run;

如有任何疑问,请与我联系。