SAS数据集中的累积频率

时间:2014-01-17 04:16:00

标签: sas cumulative-frequency

我的数据集如下所示:

 Customer Sales
        1    15
        2    14
        3    13
        4    11
        5    12
        6    18
        7    21

我需要按%销售额对客户进行排序,然后将它们分配到“高”,“中”,“低”桶......

 Customer Sales  %Sales
        7    21     20%
        6    18     17%
        1    15     14%
        2    14     13%
        3    13     13%
        5    12     12%
        4    11     11%

水桶需要基于累积频率:

 Customer Sales %Sales CumFreq Bucket
        7    21    20%     20%   High
        6    18    17%     38% Medium
        1    15    14%     52% Medium
        2    14    13%     65% Medium
        3    13    13%     78%    Low
        5    12    12%     89%    Low
        4    11    11%    100%    Low

所以,正如你可以看到,33%的销售额中的任何人都将成为一个高水桶,中间33%将是一个中型水桶,底部33%将是一个低水桶

2 个答案:

答案 0 :(得分:2)

因此,您需要对列进行求和,对数据集进行排序,然后计算累积百分比。使用自定义格式创建Bucket列。

data sales;
input  Customer Sales;
datalines;
        1    15
        2    14
        3    13
        4    11
        5    12
        6    18
        7    21
;
run;

proc sort data=sales;
by descending sales ;
run;

proc sql noprint;
select sum(sales) format=best32. into :s from sales;
quit;

proc format;
value pctSales
    0-.33='High'
    .33-.67='Medium'
    .67-1='Low';
run;

data sales;
set sales;
retain total 0;
format pctSales percent8.2;
total = total + sales;
pctSales = total/&s;
bucket = put(pctSales,pctSales.);
drop total;
run;

答案 1 :(得分:0)

/*untested: don't have access to SAS right now*/

PROC SQL noprint;
/*1st get the total no. of sales and stick it into a macro variable*/
select sum(sales) into: TotalSales
from someCustomertable;

create table topCustomers as 
select 
a.Customer
, a.sales
, a.sales/&TotalSales as salesPerc format=percent11.2
, case
    when calculated salesPerc <=1/3 then "High"
    when calculated salesPerc <=2/3 then "Medium"
    else "Low"
  end as Bucket
from someCustomertable as a
order by 3
QUIT;