按不匹配日期

时间:2016-09-06 21:30:10

标签: sql date sas

使用SAS:如果日期实际上不匹配,如何按日期加入两个表格? 例如,我想在full_table中添加一个包含' type'的列。从changepoints表中,按日期智能匹配合并。

ods listing;
/**********************************************************
main table
***********************************************************/
DATA full_table;
input id $ date date9.;
FORMAT date date9.;
DATALINES;
a 01APR2015
b 02APR2015
c 03APR2015
d 01JUN2015
e 24JUN2015
f 01DEC2015
;
RUN;

PROC PRINT;
run;

/**********************************************************
additional information
***********************************************************/
DATA changepoints;
input date date9. type $;
FORMAT date date9.;
DATALINES;
15MAR2014 spiral
05JUN2015 circle
29NOV2015 square
;
RUN;

PROC PRINT;
run;
/**********************************************************
Desired result
***********************************************************/
DATA new_table;
input id $ date date9. type $;
FORMAT date date9.;
DATALINES;
a 01APR2015 spiral
b 02APR2015 spiral
c 03APR2015 spiral
d 01JUN2015 spiral
e 24JUN2015 circle
f 01DEC2015 square
;
RUN;

PROC PRINT;
run;

/**********************************************************
join not working this way
***********************************************************/
PROC SQL;
    create table new_table2 as
    select full_table.*, changepoints.type 
    from full_table left join changepoints
    on full_table.date = changepoints.date; 
QUIT;

所需的输出是:

                              Obs    id         date     type
                               1     a     01APR2015    spiral
                               2     b     02APR2015    spiral
                               3     c     03APR2015    spiral
                               4     d     01JUN2015    spiral
                               5     e     24JUN2015    circle
                               6     f     01DEC2015    square

答案基于以下正确答案:

ods listing;
/**********************************************************
main table
***********************************************************/
DATA full_table;
input id $ date date9.;
FORMAT date date9.;
DATALINES;
a 01APR2015
b 02APR2015
c 03APR2015
d 01JUN2015
e 24JUN2015
f 01DEC2015
;
RUN;

PROC PRINT;
RUN;

/**********************************************************
additional information
***********************************************************/
DATA changepoints;
input date date9. type $;
FORMAT date date9.;
DATALINES;
15MAR2014 spiral
05JUN2015 circle
29NOV2015 square
;
RUN;

PROC PRINT;
RUN;

/**********************************************************
Update changepoints to have start/end dates so the sql join
works
***********************************************************/
PROC SORT data=changepoints;
    by descending date;
RUN;

DATA changepoints;
    set changepoints;
    end = lag(date);
    start = date;
    format start end date9.;
RUN;

PROC SORT data=changepoints;
    by date;
RUN;

DATA changepoints;
    set changepoints end=eof;
    by start;
    IF eof and missing(end) THEN end = today();
RUN;

PROC PRINT;
RUN;

/**********************************************************
Join
***********************************************************/
proc sql noprint;
create table test as
select a.id,a.date,b.type
from full_table as a
left join 
changepoints as b
on a.date >= b.start
and a.date < b.end;
quit;

PROC PRINT;
RUN;

2 个答案:

答案 0 :(得分:0)

对我来说,首选的方法是使用PROC FORMAT,只需很少的工作即可很好地处理范围:

DATA full_table;
input id $ date date9.;
FORMAT date date9.;
DATALINES;
a 01APR2015
b 02APR2015
c 03APR2015
d 01JUN2015
e 24JUN2015
f 01DEC2015
;
RUN;


DATA changepoints;
input date date9. type $;
FORMAT date date9.;
DATALINES;
15MAR2014 spiral
05JUN2015 circle
29NOV2015 square
;
RUN;


data for_fmt;
  set changepoints end=eof;
  length prev_type $6;
  retain prev_date '01JAN2014'd
         prev_type ' ';   *saving prev. record values;

  label = prev_type; 
  start = prev_date;
  end = date;
  eexcl='Y';  *exclude from "end" - so on 05JUN2015 exactly it will be circle;
  fmtname = 'CHANGEF';
  output;
  if eof then do;
    label = type;
    start = date;
    end = .;
    hlo='h';
    output;
  end;
  prev_date=date;
  prev_type=type;
  drop type;  *type is a reserved word in this context;
run;


proc format cntlin=for_fmt;
quit;

data want;
  set full_table;
  type = put(date,CHANGEF6.);
run;

答案 1 :(得分:0)

尝试将更改点更改为日期范围,如下所示:

DATA changepoints;
input Start date9. End date9. type $;
FORMAT Start End date9.;
DATALINES;
15MAR201405JUN2015  spiral
05JUN201529NOV2015  circle
29NOV201501JAN2016  square
;
RUN;

然后您可以使用简单的SQL连接,如下所示:

proc sql noprint;
create table test as
select a.id,a.date,b.type
from full_table as a
left join 
changepoints as b
on a.date >= b.start
and a.date < b.end;
quit;