所以我有一个名为table1的数据,如下所示:
Obs ID M_201812 M_201901 M_201902 M_201903
1 X1 1 . . .
2 X2 1 1 . .
3 X3 . 1 1 .
4 X4 . 1 . .
5 X5 . 1 . .
6 X6 1 . . .
7 X7 1 1 . .
8 X8 1 1 . .
9 X9 . . 1 .
10 X10 1 1 . .
这里的每一列都是一个月,它是根据一些先前运行的宏动态生成的。这些月份将是动态的,并且会有所不同。我需要做的是计算最近3个月,最近6个月和最近12个月的总和。我想到的方法如下: A)将列名称存储在宏变量中:
proc sql noprint;
select distinct name
into :cols2 separated by ','
from dictionary.columns
where upcase(memname) = 'Table1' and name not in ('ID');
;
quit;
%put &cols2.
输出如下:
M_201812,M_201901,M_201902,M_201903
B)之后根据变量中的项目数创建总和:
data table1;
set table1;
if count("&cols2",",") <=3 then do;
3m_total=sum(of &cols2);
6m_total=sum(of &cols2);
12m_total=sum(of &cols2);
end;
else if 3< count("&cols2",",") <=6 then do;
3m_total=sum(%scan(%superQ(cols2),-1,%str(,)),%scan(%superQ(cols2),-2,%str(,)),%scan(%superQ(cols2),-3,%str(,)));
6m_total=sum(of &cols2);
12m_total=sum(of &cols2);
end;
else if 6< count("&cols2",",") <=12 then do;
3m_total=sum(%scan(%superQ(cols2),-1,%str(,)),%scan(%superQ(cols2),-2,%str(,)),%scan(%superQ(cols2),-3,%str(,)));
6m_total=sum(%scan(%superQ(cols2),-1,%str(,)),%scan(%superQ(cols2),-2,%str(,)),%scan(%superQ(cols2),-3,%str(,)),%scan(%superQ(cols2),-4,%str(,)),%scan(%superQ(cols2),-5,%str(,)),%scan(%superQ(cols2),-6,%str(,)));
12m_total=sum(of &cols2);
else do;
3m_total=sum(%scan(%superQ(cols2),-1,%str(,)),%scan(%superQ(cols2),-2,%str(,)),%scan(%superQ(cols2),-3,%str(,)));
6m_total=sum(%scan(%superQ(cols2),-1,%str(,)),%scan(%superQ(cols2),-2,%str(,)),%scan(%superQ(cols2),-3,%str(,)),%scan(%superQ(cols2),-4,%str(,)),%scan(%superQ(cols2),-5,%str(,)),%scan(%superQ(cols2),-6,%str(,)));
12m_total=sum(%scan(%superQ(cols2),-1,%str(,)),%scan(%superQ(cols2),-2,%str(,)),%scan(%superQ(cols2),-3,%str(,)),%scan(%superQ(cols2),-4,%str(,)),%scan(%superQ(cols2),-5,%str(,)),%scan(%superQ(cols2),-6,%str(,)),
%scan(%superQ(cols2),-7,%str(,)),%scan(%superQ(cols2),-8,%str(,)),%scan(%superQ(cols2),-9,%str(,)),%scan(%superQ(cols2),-10,%str(,)),%scan(%superQ(cols2),-11,%str(,)),%scan(%superQ(cols2),-12,%str(,)));
end;
run;
基本上,仅当有12个月的可用列时,我们才获得12个月的总和。如果只有3个月可用,则3个月总和= 6个月总和= 12个月总和。运行代码后,出现以下错误:
ERROR 159-185: Null parameters for SUM are invalid.
这发生在最后的else do语句上。我无法终生弄清楚为什么无法阅读简单的if-then-do-else陈述。 if条件或调用宏变量时是否出错?在这里的任何帮助将不胜感激。非常感谢。
答案 0 :(得分:1)
很难知道宏逻辑实际上在生成什么代码。 但是最有可能的原因是,您正在生成两个逗号,而它们之间却没有像这样的东西:
475 data test;
476 y=sum(1,,2,3);
-
159
ERROR 159-185: Null parameters for SUM are invalid.
477 run;
您无需要么生成多余的逗号,要么在逗号之间添加一些内容。因此,缺失值的时间段或为0。
通常,如果您不使用逗号,则SAS更容易。因此,获取以空格分隔的变量名称列表。
proc contents data=Table1 noprint
out=_contents (where=(name ne 'ID'))
;
run;
proc sql noprint;
select name
, case when (varnum>9) then name else ' ' end
, case when (varnum>6) then name else ' ' end
, case when (varnum>3) then name else ' ' end
into :cols_12 separated by ' '
, :cols_3 separated by ' '
, :cols_6 separated by ' '
, :cols_9 separated by ' '
from _contents
order by name
;
quit;
然后使用of
关键字使用该名称列表,这样您就不需要在它们之间使用逗号了。
data want;
set table1;
total_3m=sum(0,of &cols_3);
total_6m=sum(0,of &cols_6);
total_9m=sum(0,of &cols_9);
total=sum(0,of &cols_12);
run;
还请记住对变量使用有效的名称。变量名称不能以数字开头。
答案 1 :(得分:0)
请考虑使用proc transpose将数据从宽变长到长整形,然后针对3/6/12个月的运行总和运行多个相关的聚合SQL子查询。正如您提到的那样,可以达到预期效果:
我需要做的是计算最近3个月,最近6个月和最近12个月的总和。
数据
data Month_Data;
infile datalines delimiter=',' DSD;
length ID $ 3;
input ID $ M_201812 M_201901 M_201902 M_201903;
datalines;
X1, 1, ., ., .
X2, 1, 1, ., .
X3, ., 1, 1, .
X4, ., 1, ., .
X5, ., 1, ., .
X6, 1, ., ., .
X7, 1, 1, ., .
X8, 1, 1, ., .
X9, ., ., 1, .
X10, 1, 1, ., .
;
重塑
proc sort data=Month_Data;
by ID;
run;
proc transpose data=Month_Data
out=Month_Data_Long;
by ID;
run;
data Month_Data_Long;
set Month_Data_Long (rename=(_NAME_ = Month_Year col1=value));
length MMYY $ 9;
format Month_Date date9.;
label Month_Year = "Month Year Original";
MMYY = tranwrd(Month_Year, "M_", "") || "01";
Month_Date = input(MMYY, yymmdd9.);
run;
汇总
proc sql;
create table Run_Sums as
select m.ID,
m.Month_Date,
m.Value,
(select sum(m.Value)
from Month_Data_Long sub
where sub.ID = m.ID
and sub.Month_Date >= intnx('month', m.Month_Date, -3)
and sub.Month_Date <= m.Month_Date
and sub.Value ^= .
) AS ThreeMonthsSum,
(select sum(m.Value)
from Month_Data_Long sub
where sub.ID = m.ID
and sub.Month_Date >= intnx('month', m.Month_Date, -6)
and sub.Month_Date <= m.Month_Date
and sub.Value ^= .
) AS SixMonthsSum,
(select sum(m.Value)
from Month_Data_Long sub
where sub.ID = m.ID
and sub.Month_Date >= intnx('month', m.Month_Date, -12)
and sub.Month_Date <= m.Month_Date
and sub.Value ^= .
) AS TwelveMonthsSum
from Month_Data_Long m;
quit;
输出 (由于OP的发布数据,总和没有差异)