需要您的协助和指导。请看下面
*rsubmit;proc sql;
connect to teradata(user=&user_id. password=&user_pwd.);
create table mylib.DWH_table as select * from connection to teradata(
select distinct nm from DWH_table
);
quit;*endrsubmit;
*rsubmit;
DATA mylib.out_sas1;
set mylib.DWH_table;
if prxmatch ("m/studio/i",nm) > 0;
run;*endrsubmit;
因此,以上代码检查nm列中的单词“ studio”并返回结果。但是,这是一个手动过程,需要自动化。我有另一个数据集,其中仅包含一个名为“ KEYWORDS”的列。我在下面提供的一些示例数据
KEYWORDS:
apple
mango
banana
grapes
目标是SAS应该将列中的单词与数据库中的值进行比较,并创建一个单独的输出表。 例如:
*rsubmit;
DATA mylib.out_sas2;
set mylib.DWH_table;
if prxmatch ("m/apple/i",nm) > 0;
run;*endrsubmit;
*rsubmit;
DATA mylib.out_sas3;
set mylib.DWH_table;
if prxmatch ("m/mango/i",nm) > 0;
run;*endrsubmit;
这可以在SAS中完成吗?
答案 0 :(得分:1)
将关键字放入宏变量中
proc sql;
select count(distinct KEYWORDS)
into :no_keys
from mylib.MY_KEYWORDS;
select distinct KEYWORDS
into :key_1-key_&no_keys
from mylib.MY_KEYWORDS;
quit;
现在使用这些宏变量
%macro find_keywords;
data
%do key_nr = 1 %to &no_keys;
mylib.out_sas&key_nr (drop = UP_nm)
%end;
;
set mylib.DWH_table;
UP_nm : upcase(nm);
%do key_nr = 1 %to &no_keys;
keyword = "&key.";
if prxmatch ("m/&&key_&key_nr/i",UP_nm) > 0 then output out_sas&key_nr;
%end;
run;
%mend;
%find_keywords;
您需要将其嵌入宏中,因为您无法在“打开”代码中使用%do
... %end;
。 &&
解析为&
,这使它成为延迟的&
,在解析&key_nr
之后得到解决。
答案 1 :(得分:0)
使用CALL EXECUTE
通过数据步骤考虑宏调用:
%macro subset_data(key);
%let name_unquoted = %qsysfunc(compress(&key., %str(%")));
data mylib.out_&name_unquoted.;
set mylib.DWH_table;
if prxmatch ("m/"||trim(&key.)||"/i",nm) > 0;
run;
%mend;
data _null_;
set mydata;
call execute('%nrstr(%subset_data("'||KEYWORDS||'"))');
run;
或者,代替call execute
,创建一个宏调用的SAS脚本文件,然后使用%include
运行:
data _null_;
set mydata;
file "Temp.sas" ;
put '%subset_data("' KEYWORDS '") ;' ;
run;
%include "Temp.sas";
但是,如果关键字很多(即,数十到数百到数千),请考虑上述@Richard的注释,以通过辅助函数 temp 数据集:
%macro subset_data(key);
*** BUILD temp WITH INDICATOR;
data temp;
set mylib.DWH_table;
if prxmatch ("m/"||trim(&key.)||"/i",nm) > 0;
keyword = &key.;
run;
*** CONCATENATE temp;
data mylib.subset_data;
set mylib.subset_data
temp;
run;
%mend;
可复制示例 (使用sashelp.class数据集)
proc contents data = sashelp.class; run;
%macro subset_data(key);
%let name_unquoted = %qsysfunc(compress(&key.,%str(%")));
data &name_unquoted.;
set sashelp.class;
if prxmatch("m/"||trim(&key.)||"/i", Name) > 0;
run;
%mend;
data keywords;
input id keyword $;
datalines;
1 w
2 u
3 y
;
data _null_;
set keywords;
call execute('%nrstr(%subset_data("'||keyword||'"))');
run;
proc sql 版本
%macro subset_data(key);
%let name_unquoted = %qsysfunc(compress(&key., %str(%")));
proc sql;
create table &name_unquoted. as
select * from mylib.DWH_table
where nm like "%" || trim(&key.) || "%";
-- where nm index(nm, trim(&key.)) > 0;
quit;
%mend;
proc sql (具有SAS ##数据集)
data keywords;
set keywords;
dname = cat("", "sas", _n_);
run;
%macro subset_data(key, dname);
%let name_unquoted = %qsysfunc(compress(&dname.,%str(%")));
proc sql;
create table &name_unquoted. as
select * from mylib.DWH_table
where nm like "%" || trim(&key.) || "%";
-- where nm index(nm, trim(&key.)) > 0;
quit;
%mend;
data _null_;
set keywords;
call execute('%nrstr(%subset_data("'||keyword||'", "'||dname||'"))');
run;
答案 2 :(得分:0)
一个想法是对 ismatch 条件执行交叉联接。结果是一张表,每个名称名词匹配一行。
示例数据和代码:
data names;
length name $80;
infile cards length=L;
input name $varying. L;
datalines;
Bob
Bob's Burgers
Angel
Angle iron city
Chad
Chadwicks town council
Dutch
Edward
run;
data nouns;
length noun $10;
infile cards length=L;
input noun $varying. L;
datalines;
chad
own
ward
burger
run;
/*
* might want to pre lowercase the data being matched up
data lower_names;
set names;
lower_name = lower(name);
data lower_nouns;
lower_noun = lower(noun);
run;
*/
proc sql;
create table want as
select name, noun
from names as NAME
cross join nouns as NOUN
where index(lowcase(NAME),lowcase(trim(NOUN))) >= 1 /* SAS INDEX() result: 1 or higher means noun is present */
;
quit;
不管您采用哪种方法,都会有很多活动。假设要对照所有名称检查100个名词,即26M个名称x 100个名词= 2.6B是匹配评估。通常,功能最强大且可用资源最多的系统将为您提供最快的答案。
情况1:SAS安装效果更好
案例2:Teradata的安装效果更好
案例1的代码:
Proc SQL;
connect to (user=&user_id. password=&user_pwd.);
* download names;
create table mylib.DWH_names as
select * from connection to Teradata (
select distinct nm from DWH_table
);
create table work.NameNounMatches as
select
nm,
noun
from
mylib.dwh_names as NAMES
cross join
mylib.nouns as NOUNS
where
INDEX(lowcase(NAMES.nm),lowcase(trim(NOUNS.noun))) >= 1
;
案例2的代码:
Teradata临时表-在https://communities.sas.com/t5/SAS-Enterprise-Guide/SAS-Access-to-Teradata-How-to-create-Temporary-tables-in/td-p/228852上从Tom上传(connection=global
libname tdwork teradata username=&username password=&password server=&server
connection=global dbmstemp=yes
;
data tdwork.NOUNS_UPLOADED;
set mylib.nouns;
run;
* cross join in Teradata via passthrough;
proc sql;
connect using tdwork;
create table work.NameNounMatches as
select * from connection to tdwork
( select Cust.UNIQUE_ID,IP.IP_NAME
from TABLE_DWH as NAMES_LIST
cross join NOUNS_UPLOADED as NOUNS_LIST
where POSITION(NAMES_LIST.nm,NOUNS_LIST.noun) >= 1
);
quit;