SAS:如何通过更改实体名称对数据集进行子集化?

时间:2016-02-04 10:58:51

标签: csv sas

我在SAS中有一个给定的数据集:

Variable  Level    Frequency  Percent
  Var1      B         941      29.46
  Var1      C         820      25.67
  Var2    Unknown     813      25.45
  Var2      E         756      23.67
  Var3      I         930      29.12
  Var3    Unknown     750      23.48

对于每个变量,第一个变量是最大值,下一个级别是第二个最大值。对于Var1,Level-B是最大值,C是第二个最大值。如果Level为Unknown,我想要每个最大值,那么第二个最大值将成为最大值。为了说明最终输出应该是:

Variable  Level   Max_Value  Percent
  Var1      B        941      29.46
  Var2      E        756      23.67
  Var3      I        930      29.12

我认为无论如何我必须对数据进行子集化,然后使用PROC TRANSPOSE来获取我需要的数据。任何人都能帮助我进行子集化和转置吗?

提前致谢。

1 个答案:

答案 0 :(得分:1)

你真的只需要按降序频率排序并保持first.variable,同时丢弃等级eq:'U'。您可以使用PROC FREQ进行排序,然后这只是子集的另一个步骤。

title;
data exp;
   infile cards dsd firstobs=2;
   input Policy_Number (var1-var3) ($) Exposure;
   'arbitrary name 243'n = rank(first(var1));
   arbitraryname4 = rantbl(123,.4);
   arbitraryname36 = rank(first(var3));
   cards;
Policy_Number,var1,var2,var3,Exposure
1,B,H,J,191
2,B,F,Unknown,174
3,C,Unknown,I,153
4,B,G,L,192
5,Unknown,E,Unknown,184
6,D,E,K,113
7,C,Unknown,I,140
8,A,H,I,133
9,C,F,I,194
10,Unknown,G,Unknown,105
11,B,H,L,172
12,A,Unknown,I,198
13,D,E,K,155
14,Unknown,G,K,177
15,B,H,Unknown,100
16,D,Unknown,J,176
17,B,E,I,112
18,Unknown,E,J,192
19,C,Unknown,K,146
20,C,G,Unknown,187
;;;;
   run;
proc print;
   run;
proc transpose data=exp(obs=0 drop=policy_number exposure) out=varlist;
   var _all_;
   run;
proc print;
   run;
Proc sql noprint; 
   select nliteral(_name_) into :classvars separated by ' ' from varlist;
   quit;
%put NOTE: &=classvars;
ods select none;
proc freq data=exp order=freq;
   tables &classvars / nocum;
   weight exposure;
   ods output onewayfreqs=freqs;
   run;
ods select all;

data freqs(keep=Variable Levels Frequency Percent);
   length Variable $32 Levels $64;
   set freqs;
   variable = substr(table,6);
   levels = coalesceC(of F_:);
   run;
data maxsansunk;
   set freqs;
   where levels ne: 'U';
   by variable notsorted;
   if first.variable;
   rename Frequency=Max_Value;
   run;
proc print;
   run;

enter image description here