计算分组行之间的最大差异

时间:2015-02-12 09:03:09

标签: sas

我有以下数据,其中家庭成员按年龄(最老到年龄)排序:

data houses;             
input HouseID PersonID Age;       
datalines;              
1 1 25                    
1 2 20                   
2 1 32
2 2 16
2 3 14
2 4 12
3 1 44
3 2 42
3 3 10
3 4 5
;
run;

我想为每个家庭计算连续老年人之间的最大年龄差异。因此,这个例子将连续为家庭1,2和3给出5(= 25-20),16(= 32-16)和32(= 42-10)的值。

我可以使用大量合并(即提取人1,与人2的提取物合并,等等)来做到这一点,但是因为在一个家庭中可能有多达20多人,我正在寻找更多更直接的方法。

5 个答案:

答案 0 :(得分:6)

这是一个双程解决方案。与上述两种解决方案相同的第一步,按年龄排序。在第二步中,跟踪每行的max_diff,在HouseID的最后一条记录中输出结果。这导致只有两次通过数据。

proc sort data=houses; by houseid age;run;

data want;
set houses;
by houseID;

retain max_diff 0;

diff = dif1(age)*-1;

if first.HouseID then do;
    diff = .; max_diff=.;
end;

if diff>max_diff then max_diff=diff;
if last.houseID then output;

keep houseID max_diff;
run; 

答案 1 :(得分:2)

proc sort data=houses; by houseid personid age;run;

data _t1;
set houses;
diff = dif1(age) * (-1);
if personid = 1 then diff = .;
run;


proc sql;
create table want as 
select houseid, max(diff) as Max_Diff
from _t1
group by houseid;

答案 2 :(得分:2)

proc sort data = house;
 by houseid descending age;
run;

data house;
set house;
by houseid;
lag_age = lag1(age);
if first.houseid then age_diff = 0;
age_diff = lag_age - age;
run;

proc sql;
 select houseid,max(age_diff) as max_age_diff
 from house
 group by houseid;
quit;

工作:

首先使用houseid和降序Age对数据集进行排序。 第二数据步骤将计算当前年龄值(PDV中)与PDV中的先前年龄值之间的差异。然后,使用sql程序,我们可以获得每个houseid的最大年龄差异。

答案 3 :(得分:2)

再加上一个混合物。这是Reeza的回应的浓缩版本。

/* No need to sort by PersonID as age is the only concern */
proc sort data = houses;
    by HouseID Age;
run;
data want;
    set houses;
    by HouseID;
    /* Keep the diff when a new row is loaded */
    retain diff;
    /* Only replace the diff if it is larger than previous */
    diff = max(diff, abs(dif(Age)));
    /* Reset diff for each new house */
    if first.HouseID then diff = 0;
    /* Only output the final diff for each house */
    if last.HouseID;
    keep HouseID diff;
run;

答案 4 :(得分:0)

以下是使用FIRST. and LAST.并通过数据进行一次传递(排序后)的示例。

data houses;             
 input HouseID PersonID Age;       
 datalines;              
1 1 25                    
1 2 20                   
2 1 32
2 2 16
2 3 14
2 4 12
3 1 44
3 2 42
3 3 10
3 4 5
;
run;

Proc sort data=HOUSES;
 by houseid descending age ;
run;

Data WANT(keep=houseid max_diff);
 format houseid max_diff;
 retain max_diff age1 age2;
 Set HOUSES;

 by houseid descending age ;

 if first.houseid and last.houseid then do;
  max_diff=0;
  output;
 end;
 else if first.houseid then do;
  call missing(max_diff,age1,age2);
  age1=age;
 end;
 else if not(first.houseid or last.houseid) then do;
  age2=age;
  temp=age1-age2;
  if temp>max_diff then max_diff=temp;
  age1=age;  
 end;
 else if last.houseid then do;
  age2=age;
  temp=age1-age2;
  if temp>max_diff then max_diff=temp;
  output;
 end;
Run;