我正在尝试显示创建数据集,以显示消费者单位如何参与四个季度访谈中的至少三个。
我不确定通过查看我看到CU_ID出现多少次来使用proc freq是否有效。 但到目前为止这是我的代码:
data fmli071Q1;
set hw06.fmli071;
QTR = 1;
run;
data fmli072Q2;
set hw06.fmli072;
QTR = 2;
run;
data fmli073Q3;
set hw06.fmli073;
QTR = 3;
run;
data fmli074Q4;
set hw06.fmli074;
QTR = 4;
run;
data fmli2007_bob2;
set fmli071Q1 fmli072Q2 fmli073Q3 fmli074Q4;
run;
proc contents data = fmli2007_bob2;
run;
data memi071Q1;
set hw06.memi071;
QTR = 1;
run;
data memi072Q2;
set hw06.memi072;
QTR = 2;
run;
data memi073Q3;
set hw06.memi073;
QTR = 3;
run;
data memi074Q4;
set hw06.memi074;
QTR = 4;
run;
data memi2007_bob2;
set memi071Q1 memi072Q2 memi073Q3 memi074Q4;
run;
proc sort data=fmli2007_bob2;
by CU_ID;
run;
proc sort data=memi2007_bob2;
by CU_ID;
run;
data ce2007_bob2;
merge fmli2007_bob2 memi2007_bob2
by CU_ID;
run;
现在,我需要展示在2007年的四个季度访谈中至少有三个参与了多少个消费者单位?我应该如何通过proc freq或其他方法执行此操作我想创建一个新的数据步骤调用至少_three_bob2,该调用仅显示参与2007年四个季度访谈中至少三个中的消费者单位的IDS。然后最终将其打印出来
data atleast_three_bob2;
set ce2007_bob2;
run;
答案 0 :(得分:0)
Proc SQL
可以根据复杂的标准执行数据聚合和频率计数。
data have;
do quarter = 1 to 4;
do id = 1 to 100;
if quarter > 0 and mod(id,7) = 0 then
output;
else
if quarter > 1 and mod(id,13) = 0 then
output;
else if quarter > 2 then
output;
end;
end;
run;
proc sql;
create table want(label="ids and quarters of ids appearing in >= 3 quarters") as
select quarter, id
from have
group by id
having count(distinct quarter) >= 3
order by id, quarter
;
create table want2(label="frequency counts for number of ids having >= 3 quarters") as
select quarter_count, count(*) as id_count
from
(
select count(*) as quarter_count
from want
group by id
)
group by quarter_count
;