我正在尝试从下面的数据集(test2)中选择6个案例的随机样本。必须按特定顺序选择案例ED,CCM,MAT,CAC。第一次传递将来自付款人1(P1)和第二次传递来自付款人2(P2),直到我得到总共6个案件。在每个度量类型中,首先选择具有最低随机数的情况。以下是我正在使用的宏,我希望能够由付款人重复。
ID Measure Payer
1439 CAC P1
1135 CCM P1
1736 ED P1
1737 MAT P1
1738 CCM P2
2351 ED P2
4251 ED P1
DATA CAC CCM ED MAT;
set test2;
if measure = 'CAC' then output CAC;
else if measure = 'CCM' then output CCM;
else if measure = 'ED' then output ED;
else if measure = 'MAT' then output MAT;
RUN;
%MACRO select (dsn,num);
DATA &dsn;
set &dsn;
min_random=min(random);
RUN;
PROC SORT data=&dsn;
by Payer min_random;
RUN;
DATA &dsn;
set &dsn;
if _N_ le #
RUN;
%MEND select;
%SELECT(ED,1); %SELECT(CCM,1); %SELECT(MAT,1); %SELECT(CAC,1);
DATA sample1A;
set ED CCM MAT CAC;
RUN;
对于上面的样本数据集,6个案例的输出应为
1736 ED P1
1135 CCM P1
1737 MAT P1
1439 CAC P1
2351 ED P2
1738 CCM P2
答案 0 :(得分:0)
我试图通过以下方式解决此问题:
下面是代码,它几乎是自我解释的。
创建样本数据集
data test2;
infile datalines;
input ID Measure $ Payer $;
datalines;
1439 CAC P1
1135 CCM P1
1736 ED P1
1737 MAT P1
1738 CCM P2
2351 ED P2
4251 ED P1
;
run;
DATA CAC CCM ED MAT;
set test2;
if measure = 'CAC' then output CAC;
else if measure = 'CCM' then output CCM;
else if measure = 'ED' then output ED;
else if measure = 'MAT' then output MAT;
RUN;
%MACRO select (dsn,num);
DATA &dsn;
set &dsn;
min_random=ranuni(0);
RUN;
PROC SORT data=&dsn;
by Payer min_random;
RUN;
DATA &dsn;
set &dsn;
if _N_ le #
RUN;
proc append base=sample1A data=&dsn. force;
run;
%MEND select;
%macro loop;
%let inp=ED,CCM,MAT,CAC;
%let Num_of_records_to_extract=6;
%let Num_of_distinct_measure=4;
data _NULL_;
loop_count=int(&Num_of_records_to_extract./&Num_of_distinct_measure.);
semi_loop_count=mod(&Num_of_records_to_extract.,&Num_of_distinct_measure.);
call symputx("loop_count",loop_count);
call symputx("semi_loop_count",semi_loop_count);
run;
%if &loop_count. ge 1 %then %do;
%do i=1 %to &loop_count.;
%do j=1 %to 4;
%SELECT(%sysfunc(scan("&inp.",&j.,",")),1);
%end;
%end;
%end;
%if &semi_loop_count. ge 1 %then %do;
%do k=1 %to &semi_loop_count.;
%SELECT(%sysfunc(scan("&inp.",&k.,",")),1);
%end;
%end;
%mend;
%loop;
答案 1 :(得分:0)
继承人我是如何实现它的。
data test2;set test2;
ran=ranuni(123);
if measure='ED' then order=1;
if measure='CCM' then order=2;
if measure='MAT' then order=3;
if measure='CAC' then order=4;
run;
proc sort data= test2 out=P1;
by order payer ran;
where payer='P1';
run;
proc sort data= test2 out=P2;
by order payer ran;
where payer='P2';
run;
data S1 S3;set P1;
by order ran;
if first.order then output S1;else
output S3;
run;
data S2 S4;set P2;
by order ran;
if first.order then output S2;else
output S4;
run;
data sample;
set S1 S2 S3 S4;
run;
data sample;set sample (obs=6);
run;
答案 2 :(得分:0)
忙碌的方式较少。任何时候你都可以使用by-group处理而不是宏循环。在这种情况下,没有必要在许多部分中破坏数据集来执行此操作,如果数据集很大,则在I / O方面是昂贵的。
这是一种易于维护的解决方案......未经测试:
%let SAMPLE_SIZE=6 ;
proc format ;
value $measure2order
'ED' = '1'
'CCM' = '2'
'MAT' = '3'
'CAC' = '4'
other = 'X'
; run;
*--- changing dataset name because I am tossing ---* ;
*--- records with unexpected values (or more ---* ;
*--- specifically, measures that are not ---* ;
*--- needed for this particular sample.) ---* ;
data SAMPLE
ODD_STUFF ;
set test2 ;
random=ranuni(123) ;
order=put(measure,$measure2order.) ;
if order='X' then output ODD_STUFF ;
else output SAMPLE ;
run;
proc sort data=SAMPLE ;
by payer order random ;
run;
data SAMPLE ;
set SAMPLE ;
by payer order random ;
if first.order ;
sample_count+1 ;
output ;
if sample_count GE &SAMPLE_SIZE then stop ;
drop random sample_count ;
run;
(顺便提一下......这里的问题定义似乎很古怪。假设你有一套好的数据,你的样本将有4个随机记录,每个指标对付款人= 1,每个随机记录1个付款人的前两项措施= 2。)