我在SAS中有一个给定的数据集:
Variable Level Frequency Percent
Var1 B 941 29.46
Var1 C 820 25.67
Var2 Unknown 813 25.45
Var2 E 756 23.67
Var3 I 930 29.12
Var3 Unknown 750 23.48
对于每个变量,第一个变量是最大值,下一个级别是第二个最大值。对于Var1,Level-B是最大值,C是第二个最大值。如果Level为Unknown,我想要每个最大值,那么第二个最大值将成为最大值。为了说明最终输出应该是:
Variable Level Max_Value Percent
Var1 B 941 29.46
Var2 E 756 23.67
Var3 I 930 29.12
我认为无论如何我必须对数据进行子集化,然后使用PROC TRANSPOSE
来获取我需要的数据。任何人都能帮助我进行子集化和转置吗?
提前致谢。
答案 0 :(得分:1)
你真的只需要按降序频率排序并保持first.variable,同时丢弃等级eq:'U'。您可以使用PROC FREQ进行排序,然后这只是子集的另一个步骤。
title;
data exp;
infile cards dsd firstobs=2;
input Policy_Number (var1-var3) ($) Exposure;
'arbitrary name 243'n = rank(first(var1));
arbitraryname4 = rantbl(123,.4);
arbitraryname36 = rank(first(var3));
cards;
Policy_Number,var1,var2,var3,Exposure
1,B,H,J,191
2,B,F,Unknown,174
3,C,Unknown,I,153
4,B,G,L,192
5,Unknown,E,Unknown,184
6,D,E,K,113
7,C,Unknown,I,140
8,A,H,I,133
9,C,F,I,194
10,Unknown,G,Unknown,105
11,B,H,L,172
12,A,Unknown,I,198
13,D,E,K,155
14,Unknown,G,K,177
15,B,H,Unknown,100
16,D,Unknown,J,176
17,B,E,I,112
18,Unknown,E,J,192
19,C,Unknown,K,146
20,C,G,Unknown,187
;;;;
run;
proc print;
run;
proc transpose data=exp(obs=0 drop=policy_number exposure) out=varlist;
var _all_;
run;
proc print;
run;
Proc sql noprint;
select nliteral(_name_) into :classvars separated by ' ' from varlist;
quit;
%put NOTE: &=classvars;
ods select none;
proc freq data=exp order=freq;
tables &classvars / nocum;
weight exposure;
ods output onewayfreqs=freqs;
run;
ods select all;
data freqs(keep=Variable Levels Frequency Percent);
length Variable $32 Levels $64;
set freqs;
variable = substr(table,6);
levels = coalesceC(of F_:);
run;
data maxsansunk;
set freqs;
where levels ne: 'U';
by variable notsorted;
if first.variable;
rename Frequency=Max_Value;
run;
proc print;
run;