我想通过导入多个Excel文件在SAS中创建一个数据集。 excel文件内部具有相同的变量,并保存在同一目录中,这些文件的名称均一致:
excel_20150101 excel_20150201
我该如何用SAS编写?
答案 0 :(得分:1)
此宏将遍历目录中的所有Excel文件,并将内容导入名为DS1,DS2,DS3…的数据集中。 DS400(如果有400个Excel文件)。请确保在特定目录中仅保留Excel(.xlsx)文件。
options merror mlogic mprint symbolgen spool;
%macro drive(dir,ext);
%local filrf rc did memcnt name i;
/* Assigns a fileref to the directory and opens the directory */
%let rc=%sysfunc(filename(filrf,&dir));
%let did=%sysfunc(dopen(&filrf));
/* Loops through entire directory */
%do i = 1 %to %sysfunc(dnum(&did));
/* Retrieve name and import each Excel file */
%let name=%qsysfunc(dread(&did,&i));
proc import
out=DS&i
datafile= "Y:\Excel\&name"
dbms=XLSX replace;
getnames=yes;
run;
%end;
/* Closes the directory and clear the fileref */
%let rc=%sysfunc(dclose(&did));
%let rc=%sysfunc(filename(filrf));
%mend drive;
/* First parameter is the directory of where your files are stored. */
/* Second parameter is the extension you are looking for. */
%drive(Y:\Excel,xlsx);
答案 1 :(得分:0)
这是创建所有文件名及其路径的数据集的方法
%let windowpath=/data/exports/Analytics/Users/psamson/Travel_project/PCA/;
data file; /*THIS PREPARE A TABLE OF TARGET FILE TO IMPORT*/
length file $200 name $28;
rc=filename("myfile","&windowpath");
DirId=dopen("myfile");
memcount=dnum(DirId);
do i = 1 to memcount;
fdir = "&windowpath./" || strip(dread(DirId,i));
rc2=filename("fdir",fdir);
fDirId=fopen("fdir");
if fDirId > 0 then do;
file = strip(dread(DirId,i));
name = substr(strip(dread(DirId,i)),1,25);
if index(file,'.xlsx') then output; /*THIS ENSURE WE ONLY IMPORT CSV file */
end;
rc2 = fclose(fDirId);
rc2= filename("fdir", "");
end;
rc = dclose(DirId);
rc = filename("myfile");
keep file name ; /*KEEP THE 2 VARIABLE TO VALIDATE AND USE FOR IMPORT*/
run;
从该列表循环通过文件表,像上面的宏一样调用您的导入!
答案 2 :(得分:0)
所以这将很有趣。当您从Excel导入数据时,类型不受控制,您几乎无法控制。因此,我有99%的肯定会遇到合并数据的问题,因为类型无法对齐。我建议将所有文件转换为CSV(通过批处理脚本),然后一次导入所有CSV。
这是我写的可全部导入的宏:
%*Creates a list of all files in the DIR directory with the specified extension (EXT);
%macro list_files(dir,ext);
%local filrf rc did memcnt name i;
%let rc=%sysfunc(filename(filrf,&dir));
%let did=%sysfunc(dopen(&filrf));
%if &did eq 0 %then
%do;
%put Directory &dir cannot be open or does not exist;
%return;
%end;
%do i = 1 %to %sysfunc(dnum(&did));
%let name=%qsysfunc(dread(&did,&i));
%if %qupcase(%qscan(&name,-1,.)) = %upcase(&ext) %then
%do;
%put &dir\&name;
%let file_name = %qscan(&name,1,.);
%put &file_name;
data _tmp;
length dir $512 name $100;
dir=symget("dir");
name=symget("name");
path = catx('\',dir,name);
the_name = substr(name,1,find(name,'.')-1);
run;
proc append base=list data=_tmp force;
run;
quit;
proc sql;
drop table _tmp;
quit;
%end;
%else %if %qscan(&name,2,.) = %then
%do;
%list_files(&dir\&name,&ext)
%end;
%end;
%let rc=%sysfunc(dclose(&did));
%let rc=%sysfunc(filename(filrf));
%mend list_files;
%*Macro to import a single file, using the path, filename and an output dataset name must be specified;
%macro import_file(path, file_name, dataset_name );
proc import
datafile="&path.\&file_name."
dbms=xlsx
out=&dataset_name replace;
run;
%mend;
*Create the list of files, in this case all XLSX files;
%list_files(c:\_localData\temp, xlsx);
%*Call macro once for each entry in the list table created from the %list_files() macro;
data _null_;
set list;
string = catt('%import_file(', dir, ', ', name,', ', catt('test', put(_n_, z2.)), ');');
call execute (string);
run;
但是,我建议您将所有文件转换为CSV,然后再导入,而不是使用此方法。 https://gist.github.com/statgeek/878e585102c14e01581f55dbe972d27e
然后将所有CSV一次导入到一个文件中: https://blogs.sas.com/content/sasdummy/2018/10/09/read-multiple-text-files/