我有两个数据集。他们的主题是同一组,但不同的“样本”是由不同的研究人员绘制的。因此,
我无法识别匹配(即两个样本中的Peter Smith),
变量的名称不同,但它们的含义相同,
现在我想比较两个数据集,看看它们的整体程度有多么相似。我想过比较变量的频率输出,我知道它们具有相同的含义。
到目前为止,我的解决方案在我眼中相当麻烦,因为我必须为每个变量组合执行所有步骤。
那里有更优雅/更有效的解决方案吗?
我还想比较数字变量的均值,中位数,百分位数。
非常感谢你!
Gerit
以下是我目前的解决方案示例。我将观察结果命名为不同,因为我不能在我的原始文件中从ID变量告诉哪个观察是哪个。
data have1;
input Name $ road means goal ;
datalines;
adam 1 3 0
bob 1 1 1
clint 2 2 0
dean 3 1 1
eric 2 1 0
flint 1 2 1
gerald 3 1 1
;
run;
data have2;
input Name $ street finish other purpose ;
datalines;
harry 2 1 0 3
idefix 1 0 0 2
john 3 1 1 2
kelvin 1 0 2 2
liam 2 1 2 1
max 3 1 2 1
nero 2 0 1 3
ovid 3 0 2 3
;
run;
proc freq data=have1;
tables road / out= fhave1road
(rename=(percent=pct1 count=count1));
quit;
proc freq data=have2;
tables street / out= fhave2street
(rename=(street = road percent=pct2 count=count2));
quit;
data comb;
merge fhave1road fhave2street;
by road;
diffpct = pct1 - pct2;
diffct = count1 - count2;
run;
proc print data = comb;
var road count1 count2 diffct pct1 pct2 diffpct;
quit;
proc means data=have1;
var road;
output out=mhave1road ;
quit;
proc sort data=mhave1road;
by _STAT_;
quit;
proc means data=have2;
var street;
output out=mhave2street ;
quit;
proc sort data=mhave2street;
by _STAT_;
quit;
data mcomb (keep=_STAT_ road street diff) ;
merge mhave1road mhave2street;
by _STAT_;
diff = road-street;
run;
proc print data = mcomb;
quit;
答案 0 :(得分:0)
您将从
获得相当多的信息title 'have1';
proc transpose data=have1 out=trans1;
by Name;
var road means goal;
run;
title 'have2';
proc transpose data=have2 out=trans2;
by Name;
var street finish other purpose;
run;
title 'both';
data both;
set trans1 (in=in1) trans2 (in=in2);
if in1 then source = 1;
if in2 then source = 2;
run;
proc sgpanel data=both;
panelby source;
vbox col1 / category= _name_;
run;
或带有框图的图形等效物:
ast