我的数据集包含81列人员ID,79个二进制变量和一个成本变量:
id h1 h2 h3 ... h79 cost
1 1 0 1 1 15
2 1 1 1 1 80
3 0 1 1 0 10
...
每个人ID都有一行记录。现在我想选择两个h(二进制)变量中的哪一个具有超过50个唯一的人ID。如果那么确实计算他们的总成本。 我想一个接近它的好方法是创建一个包含所有h变量的数组并使用两个DO LOOPS?但是,如果我想看到一组三个变量或者四个或五个变量怎么办?而且我如何存储变量名称的组合,以便我知道这个变量组合具有这个总成本。所以我认为最终的输出结果如下:
combinations total cost
h1&h3 95
h2&h3 90
h1&h2&h3. 80
谢谢你的帮助!
答案 0 :(得分:0)
听起来你只想使用PROC SUMMARY。
data have ;
input id h1 h2 h3 h79 cost ;
cards;
1 1 0 1 1 15
2 1 1 1 1 80
3 0 1 1 0 10
;
proc summary data=have chartype ;
class h1-h3 ;
var cost ;
output out=cost_summary sum= ;
run;
但您只对所有贡献类变量的值为1的结果感兴趣。
proc print data=cost_summary ;
where min(h1,h2,h3) = 1 ;
run;
结果:
Obs h1 h2 h3 _TYPE_ _FREQ_ cost
2 . . 1 001 3 105
4 . 1 . 010 2 90
6 . 1 1 011 2 90
8 1 . . 100 2 95
10 1 . 1 101 2 95
13 1 1 . 110 1 80
16 1 1 1 111 1 80
答案 1 :(得分:0)
DATA步骤可以使用ALLCOMB
或ALLCOMBI
例程来迭代大小为n的数组的k子集组合。哈希可用于累积每个特定k子集的计数和总成本,断言所有真实条件。
options mprint;
data have (keep=id flag: cost);
do id = 1 to 3;
array flag(79) flag01-flag79;
do i = 1 to dim(flag);
flag(i) = ranuni(1) < 0.5;
end;
cost = ceil(10+100*ranuni(123));
output;
end;
run;
实施例
data _null_;
if 0 then set have;* prep pdv;
array x flag:;
n = dim(x);
k = 2; ways2 = comb(dim(x),k); put 'NOTE: ' n= k= ways2=;
k = 3; ways3 = comb(dim(x),k); put 'NOTE: ' n= k= ways3=;
k = 4; ways4 = comb(dim(x),k); put 'NOTE: ' n= k= ways4=;
k = 5; ways5 = comb(dim(x),k); put 'NOTE: ' n= k= ways5=;
array var(5) $32;
length count cost_sum 8;
declare hash all_true(hashexp:15, ordered:'A');
all_true.defineKey('var1', 'var2', 'var3', 'var4', 'var5');
all_true.defineData('var1', 'var2', 'var3', 'var4', 'var5', 'count', 'cost_sum');
all_true.defineDone();
do until (end);
set have end=end;
array f flag:;
%macro track_all_true(K=);
array index&K._[&K];
call missing (of index&K._[*]); %* reset search tracking variables;
call missing (of var[*]); %* reset search tracking variables;
%* search all combinations for those that are all true;
do p = 1 to comb(n,&K);
call allcombi(n, &K, of index&K._[*], add, remove);
%* check each item in the combination;
do q = 1 to &K while(x[index&K._[q]]);
end;
if q > &K then do; %* each item was true;
do q = 1 to &K;
which_index = index&K._[q];
which_var = vname( x[which_index] );
var(q) = which_var;
end;
if all_true.find() ne 0 then do; %* track first occurrence of the combination;
cost_sum = cost;
count = 1;
all_true.add();
end;
else do; %* accumulate count and cost information for the combination;
cost_sum + cost;
count + 1;
all_true.replace();
end;
end;
end;
%mend;
%track_all_true(K=2)
%track_all_true(K=3)
%track_all_true(K=4)
%track_all_true(K=5)
end;
all_true.output(dataset:'count_cost');
stop;
run;