使用SAS:如果日期实际上不匹配,如何按日期加入两个表格? 例如,我想在full_table中添加一个包含' type'的列。从changepoints表中,按日期智能匹配合并。
ods listing;
/**********************************************************
main table
***********************************************************/
DATA full_table;
input id $ date date9.;
FORMAT date date9.;
DATALINES;
a 01APR2015
b 02APR2015
c 03APR2015
d 01JUN2015
e 24JUN2015
f 01DEC2015
;
RUN;
PROC PRINT;
run;
/**********************************************************
additional information
***********************************************************/
DATA changepoints;
input date date9. type $;
FORMAT date date9.;
DATALINES;
15MAR2014 spiral
05JUN2015 circle
29NOV2015 square
;
RUN;
PROC PRINT;
run;
/**********************************************************
Desired result
***********************************************************/
DATA new_table;
input id $ date date9. type $;
FORMAT date date9.;
DATALINES;
a 01APR2015 spiral
b 02APR2015 spiral
c 03APR2015 spiral
d 01JUN2015 spiral
e 24JUN2015 circle
f 01DEC2015 square
;
RUN;
PROC PRINT;
run;
/**********************************************************
join not working this way
***********************************************************/
PROC SQL;
create table new_table2 as
select full_table.*, changepoints.type
from full_table left join changepoints
on full_table.date = changepoints.date;
QUIT;
所需的输出是:
Obs id date type
1 a 01APR2015 spiral
2 b 02APR2015 spiral
3 c 03APR2015 spiral
4 d 01JUN2015 spiral
5 e 24JUN2015 circle
6 f 01DEC2015 square
答案基于以下正确答案:
ods listing;
/**********************************************************
main table
***********************************************************/
DATA full_table;
input id $ date date9.;
FORMAT date date9.;
DATALINES;
a 01APR2015
b 02APR2015
c 03APR2015
d 01JUN2015
e 24JUN2015
f 01DEC2015
;
RUN;
PROC PRINT;
RUN;
/**********************************************************
additional information
***********************************************************/
DATA changepoints;
input date date9. type $;
FORMAT date date9.;
DATALINES;
15MAR2014 spiral
05JUN2015 circle
29NOV2015 square
;
RUN;
PROC PRINT;
RUN;
/**********************************************************
Update changepoints to have start/end dates so the sql join
works
***********************************************************/
PROC SORT data=changepoints;
by descending date;
RUN;
DATA changepoints;
set changepoints;
end = lag(date);
start = date;
format start end date9.;
RUN;
PROC SORT data=changepoints;
by date;
RUN;
DATA changepoints;
set changepoints end=eof;
by start;
IF eof and missing(end) THEN end = today();
RUN;
PROC PRINT;
RUN;
/**********************************************************
Join
***********************************************************/
proc sql noprint;
create table test as
select a.id,a.date,b.type
from full_table as a
left join
changepoints as b
on a.date >= b.start
and a.date < b.end;
quit;
PROC PRINT;
RUN;
答案 0 :(得分:0)
对我来说,首选的方法是使用PROC FORMAT
,只需很少的工作即可很好地处理范围:
DATA full_table;
input id $ date date9.;
FORMAT date date9.;
DATALINES;
a 01APR2015
b 02APR2015
c 03APR2015
d 01JUN2015
e 24JUN2015
f 01DEC2015
;
RUN;
DATA changepoints;
input date date9. type $;
FORMAT date date9.;
DATALINES;
15MAR2014 spiral
05JUN2015 circle
29NOV2015 square
;
RUN;
data for_fmt;
set changepoints end=eof;
length prev_type $6;
retain prev_date '01JAN2014'd
prev_type ' '; *saving prev. record values;
label = prev_type;
start = prev_date;
end = date;
eexcl='Y'; *exclude from "end" - so on 05JUN2015 exactly it will be circle;
fmtname = 'CHANGEF';
output;
if eof then do;
label = type;
start = date;
end = .;
hlo='h';
output;
end;
prev_date=date;
prev_type=type;
drop type; *type is a reserved word in this context;
run;
proc format cntlin=for_fmt;
quit;
data want;
set full_table;
type = put(date,CHANGEF6.);
run;
答案 1 :(得分:0)
尝试将更改点更改为日期范围,如下所示:
DATA changepoints;
input Start date9. End date9. type $;
FORMAT Start End date9.;
DATALINES;
15MAR201405JUN2015 spiral
05JUN201529NOV2015 circle
29NOV201501JAN2016 square
;
RUN;
然后您可以使用简单的SQL连接,如下所示:
proc sql noprint;
create table test as
select a.id,a.date,b.type
from full_table as a
left join
changepoints as b
on a.date >= b.start
and a.date < b.end;
quit;