我正在使用一个拥有多个股票的数据集,并将一个数据集与事件合并到一起(对于股票,每个股票期间的几个事件)。
现在,对于一项事件研究,我想创建几个充当虚拟变量并创建窗口的变量:-60到-11天,-5到-1天以及公告日加+1天。 重要的是两件事:
我尝试了以下但它只是给了我一个窗口,并没有考虑不同的股票和被宠坏的窗口:
proc sql;
create view event_study as
select distinct b.ann_date,a.date,a.dayid-b.dayid as event_time, a.stock,a.return
from Dataset_full as a,Announcements as b
where a.dayid-b.dayid between -60 and 11 and a.secid=b.secid
order by a.stockb.ann_date,event_time;
quit;
一些信息:公告日是事件 dataset_full具有库存,日期,退货,数量。每个日历/交易日一行。 公告有股票,公告日期和公告信息(每个公告一行) 数据应如下所示:
Stock Date Ann_date flag_minus60_minus11 flag_minus5_minus1 flag_day0_day1
A 1/01/2016 1
A 2/01/2016 1
A 3/01/2016
A 4/01/2016 4/01/2016 1
A 5/01/2016 1
A 6/01/2016
A 7/01/2016
A 8/01/2016
A 9/01/2016
A 10/01/2016
A 11/01/2016
A 12/01/2016
A 13/01/2016
A 14/01/2016
A 15/01/2016
B 1/01/2016 1
B 2/01/2016 1
B 3/01/2016 1
B 4/01/2016 1
B 5/01/2016 1
B 6/01/2016 1
B 7/01/2016
B 8/01/2016
B 9/01/2016
B 10/01/2016
B 11/01/2016 1
B 12/01/2016 1
B 13/01/2016 1
B 14/01/2016 1
B 15/01/2016 1
B 16/01/2016 16/01/2016 1
B 17/01/2016 1
B 18/01/2016 1
B 19/01/2016 1
B 20/01/2016 20/01/2016 1
B 21/01/2016 1
B 22/01/2016
B 23/01/2016
B 24/01/2016
B 25/01/2016
答案 0 :(得分:1)
MABO:
这是一些示例数据和SQL。当你检查输出时,我认为你会看到“被破坏的”信息 - 这是在标记时间框架内有多个未来公告的日期。
关于事件日期标记交易日期的问题是内部联接。必须对正在计算的每个标志执行内连接,并且需要将内连接保持连接到交易数据以获得“想要”。
data trading;
do group = 1 to 4;
do date = today()-1000 to today(); format date yymmdd10.;
output;
end;
end;
run;
data announcement;
do group = 1 to 4;
do date = today()-1000 to today(); format date yymmdd10.;
if ranuni(123) < 0.01 then output;
end;
end;
run;
proc sql;
create table trading_pre_announce_flagged as
select
trading.*
, announcement.date as annouce_date
, case when P0.date is not null then 1 else . end as P0_flag label="Announcement was today or yesterday"
, case when P1.date is not null then 1 else . end as P1_flag label="Announcement in 1 to 5 days"
, case when P2.date is not null then 1 else . end as P2_flag label="Announcement in 11 to 60 days"
, case when P2.date is not null then P2.adate else . end as P2_date label="Date of Announcement in 11 to 60 days" format=yymmdd10.
from
trading
left join
announcement
on announcement.date = trading.date and announcement.group = trading.group
left join
( select trading.group, trading.date
from trading
inner join
announcement
on announcement.group = trading.group
and announcement.date - trading.date between -1 and 0
) as P0
on P0.date = trading.date and P0.group = trading.group
left join
( select trading.group, trading.date
from trading
inner join
announcement
on announcement.group = trading.group
and announcement.date - trading.date between 1 and 5
) as P1
on P1.date = trading.date and P1.group = trading.group
left join
( select trading.group, trading.date, announcement.date as adate
from trading
inner join
announcement
on announcement.group = trading.group
where announcement.date - trading.date between 11 and 60
) as P2
on P2.date = trading.date and P2.group = trading.group
order
by trading.group, trading.date
;
quit;
在某些时候(虽然找不到),OP提到处理~750家公司和500个整体事件,并且SQL解决方案似乎长期运行。
另一种选择是DATA Step。
500个事件是一个足够小的基数,其中组和日期的数组可用于存储事件以进行查找。已排序事件的智能索引跟踪可用于执行最小扫描以评估规则和应用条件标志。
例如:
data trading;
do group = 1 to 700;
do date = today()-1000 to today(); format date yymmdd10.;
output;
end;
end;
run;
data announcement;
do eventid = 1 to 500;
group = ceil(700*ranuni(123));
date = (today()-1000) + ceil(1000*ranuni(123)); format date yymmdd10.;
if mod(eventid,20) = 1 then do;
output;
eventid+1;
date = date + 30 + floor(100*ranuni(123));
output;
eventid+1;
date = date + 30 + floor(100*ranuni(123));
end;
output;
end;
run;
proc sort data=announcement;
by group date;
run;
data _null_;
if 0 then set announcement nobs=nobs;
call symputx ('top', nobs+1);
run;
data marked_trading;
array e_group(0:&TOP) _temporary_;
array e_date (0:&TOP) _temporary_;
* load event array;
do _n_ = 1 by 1 until (last_announcement);
set announcement end=last_announcement;
e_group(_n_) = group;
e_date(_n_) = date;
eix0 = 1;
eix1 = 1;
end;
e_group(0) = 0; * sentinel;
e_group(_n_) = 1e9; *sentinel;
* evaluate flagging criteria for each trade group date;
do _n_ = 1 by 1 until (last_trading);
set trading end=last_trading;
by group;
if first.group then do;
* discover indices of events associated with the group;
do eix0 = eix0 by 1 while (e_group(eix0) < group); end;
do eix1 = eix0 by 1 while (e_group(eix1) = group); end; eix1 = eix1 - 1;
eix_group = e_group(eix0);
end;
p3_flag = .; p2_flag = .; p1_flag = .;
if group = eix_group then do;
* NOTE: bounds are evaluated only at loop initialization;
* evaluate events for flagging a trade;
do ix = eix0 to eix1;
days_to_event = e_date(ix) - date;
if not p3_flag then if 11 <= days_to_event <= 60 then p3_flag = 1;
if not p2_flag then if 1 <= days_to_event <= 5 then p2_flag = 1;
if not p1_flag then if -1 <= days_to_event <= 0 then p1_flag = 1;
if days_to_event <= -1 then eix0 = ix+1; * update when applicability exhausted;
end;
end;
output;
end;
keep group date p:;
stop;
run;