我已使用此代码将Excel表格加载到SAS中 -
FILENAME REFFILE "/folders/myfolders/subji.xlsx" TERMSTR=CR;
PROC IMPORT DATAFILE=REFFILE
DBMS=XLSX
OUT=ds;
GETNAMES=YES;
RUN;
然后对其进行排序以使用此排序过程对其应用重复的度量分析 -
PROC SORT DATA=ds;
BY subject Color_Compatibility sameloc;
RUN;
然后,使用此代码运行单变量ANOVA检索统计数据和效果 -
PROC UNIVARIATE DATA=ds NOPRINT;
VAR resprt;
OUTPUT OUT=unids1 MEAN=resprt;
BY subject Color_Compatibility sameloc;
where Color_Compatibility >0
and practice = 0
and outlier = 0
and respAC=1;
RUN;
异常值列目前通过excel计算,但我注意到excel的STDEV函数给出的值不准确。出于这个原因,我想用SAS创建一个异常值变量,然后从我的分析中删除每个异常值行(使用+/- 2.5 STDEV作为基准)。 怎么可以这样做? 感谢。
答案 0 :(得分:1)
这是一种使用proc sql一步识别异常值的方法。您可以在SQL中计算聚合统计信息,但它会在您的日志中留下关于重新合并的警告。关键是要确保GROUP BY变量是您想要计算的级别。在这个例子中,我正在根据车辆中的汽缸数量从SASHELP.CARS数据集中寻找MPG_CITY指标中的异常值。
*Identify Outliers;
proc sql;
create table outliers as
select *, std(mpg_city) as std, mean(mpg_city) as avg,
case when ((mpg_city - calculated avg)/(calculated std) < -2.5) or ((mpg_city - calculated avg)/(calculated std) > 2.5) then 'Outlier'
else 'Normal'
end as outlier_status
from sashelp.cars
group by cylinders;
quit;
*Check number of outliers;
proc freq data=outliers;
table outlier_status;
run;
*Print observations of interest;
proc print data=outliers;
where outlier_status='Outlier';
var origin make model cylinders mpg_city std avg;
run;