SAS ---在t + 5选择观察

时间:2018-02-28 16:44:23

标签: time sas

我尝试根据不同时间的价格计算公式(enter image description here)。更具体地说,enter image description here捐赠了在测量的价格之后至少5分钟观察到的第一个价格。

以下代码用于创建代表enter image description here的变量。

data WANT;
set  HAVE nobs=nobs;
do _i = _n_ to nobs until(other_date > date_l_);
    set  HAVE(
        rename=(    _ric=other_ric
                    date_l_= other_date 
                    price = other_price 
                    new_time = other_time)
        keep=_ric date_l_ price int1min new_time) 
        point=_i;
    if other_ric=_ric and new_time > new_time+300 and other_date = date_l_ then do;
        new_price = other_price;
        leave;
        end;
    end;
drop other_: ;
run;    

但是,代码始终无法正常运行。如图所示,new_price在绿色矩形中是正确的,但在红色矩形中是不正确的。有谁能帮我解决这个问题?

以下是数据样本。

_RIC    Date_L_ Time_L_ Price   new_price   new_time    time
BAG201310900.U  20130715    9:36:19.721 0.27    0.29    9:36    9:41
BAG201310900.U  20130715    9:36:19.721 0.27    0.29    9:36    9:41
BAG201310900.U  20130715    9:36:22.751 0.27    0.29    9:36    9:41
BAG201310900.U  20130715    9:36:22.751 0.27    0.29    9:36    9:41
BAG201310900.U  20130715    9:36:24.400 0.27    0.29    9:36    9:41
BAG201310900.U  20130715    9:36:24.400 0.27    0.29    9:36    9:41
BAG201310900.U  20130715    9:36:28.150 0.27    0.29    9:36    9:41
BAG201310900.U  20130715    9:36:28.150 0.27    0.29    9:36    9:41
BAG201310900.U  20130715    9:36:45.099 0.27    0.29    9:36    9:41
BAG201310900.U  20130715    9:36:45.099 0.27    0.29    9:36    9:41
BAG201310900.U  20130715    9:36:48.929 0.28    0.29    9:36    9:41
BAG201310900.U  20130715    9:36:48.929 0.28    0.29    9:36    9:41
BAG201310900.U  20130715    9:36:49.929 0.28    0.29    9:36    9:41
BAG201310900.U  20130715    9:36:50.899 0.28    0.29    9:36    9:41
BAG201310900.U  20130715    9:37:04.839 0.27    0.29    9:37    9:42
BAG201310900.U  20130715    9:37:04.839 0.27    0.29    9:37    9:42
BAG201310900.U  20130715    9:37:04.848 0.27    0.29    9:37    9:42
BAG201310900.U  20130715    9:37:07.619 0.28    0.29    9:37    9:42
BAG201310900.U  20130715    9:37:11.619 0.28    0.29    9:37    9:42
BAG201310900.U  20130715    9:37:11.619 0.28    0.29    9:37    9:42
BAG201310900.U  20130715    9:37:11.619 0.28    0.29    9:37    9:42
BAG201310900.U  20130715    9:37:12.738 0.28    0.29    9:37    9:42
BAG201310900.U  20130715    9:37:15.528 0.28    0.29    9:37    9:42
BAG201310900.U  20130715    9:37:30.337 0.28    0.29    9:37    9:42
BAG201310900.U  20130715    9:37:32.717 0.28    0.29    9:37    9:42
BAG201310900.U  20130715    9:37:58.636 0.29    0.29    9:37    9:42
BAG201310900.U  20130715    9:38:04.016 0.28    0.29    9:38    9:43
BAG201310900.U  20130715    9:38:07.326 0.28    0.29    9:38    9:43
BAG201310900.U  20130715    9:38:07.849 0.28    0.29    9:38    9:43
BAG201310900.U  20130715    9:38:16.005 0.3 0.29    9:38    9:43
BAG201310900.U  20130715    9:38:18.055 0.3 0.29    9:38    9:43
BAG201310900.U  20130715    9:38:18.055 0.3 0.29    9:38    9:43
BAG201310900.U  20130715    9:38:18.055 0.3 0.29    9:38    9:43
BAG201310900.U  20130715    9:38:20.025 0.3 0.29    9:38    9:43
BAG201310900.U  20130715    9:38:21.235 0.3 0.29    9:38    9:43
BAG201310900.U  20130715    9:38:25.585 0.3 0.29    9:38    9:43
BAG201310900.U  20130715    9:40:01.475 0.29    0.22    9:40    9:45
BAG201310900.U  20130715    9:45:04.335 0.22    0.27    9:45    9:50
BAG201310900.U  20130715    9:45:04.335 0.22    0.27    9:45    9:50
BAG201310900.U  20130715    9:45:04.335 0.22    0.27    9:45    9:50
BAG201310900.U  20130715    9:45:35.966 0.24    0.27    9:45    9:50
BAG201310900.U  20130715    9:51:13.808 0.27    0.19    9:51    9:56
BAG201310900.U  20130715    9:52:41.409 0.27    0.19    9:52    9:57
BAG201310900.U  20130715    9:53:32.730 0.28    0.19    9:53    9:58
BAG201310900.U  20130715    9:53:33.250 0.29    0.19    9:53    9:58
BAG201310900.U  20130715    9:53:36.580 0.26    0.19    9:53    9:58
BAG201310900.U  20130715    9:53:36.580 0.26    0.19    9:53    9:58
BAG201310900.U  20130715    9:53:36.580 0.26    0.19    9:53    9:58
BAG201310900.U  20130715    9:53:36.580 0.26    0.19    9:53    9:58
BAG201310900.U  20130715    9:53:36.580 0.26    0.19    9:53    9:58
BAG201310900.U  20130715    9:53:36.580 0.26    0.19    9:53    9:58
BAG201310900.U  20130715    9:54:00.601 0.25    0.19    9:54    9:59
BAG201310900.U  20130715    9:54:24.842 0.24    0.19    9:54    9:59
BAG201310900.U  20130715    9:57:42.068 0.19    0.24    9:57    10:02
BAG201310900.U  20130715    9:57:42.068 0.19    0.24    9:57    10:02
BAG201310900.U  20130715    9:57:42.068 0.19    0.24    9:57    10:02
BAG201310900.U  20130715    10:02:36.960    0.24    0.26    10:02   10:07
BAG201310900.U  20130715    10:06:46.735    0.26    0.24    10:06   10:11
BAG201310900.U  20130715    10:08:28.588    0.23    0.24    10:08   10:13
BAG201310900.U  20130715    10:09:13.008    0.24    0.24    10:09   10:14
BAG201310900.U  20130715    10:09:13.008    0.24    0.24    10:09   10:14
BAG201310900.U  20130715    10:09:13.008    0.24    0.24    10:09   10:14
BAG201310900.U  20130715    10:09:13.008    0.24    0.24    10:09   10:14
BAG201310900.U  20130715    10:09:13.008    0.24    0.24    10:09   10:14
BAG201310900.U  20130715    10:09:13.018    0.24    0.24    10:09   10:14
BAG201310900.U  20130715    10:09:22.508    0.24    0.24    10:09   10:14
BAG201310900.U  20130715    10:09:22.508    0.24    0.24    10:09   10:14
BAG201310900.U  20130715    10:09:22.528    0.24    0.24    10:09   10:14
BAG201310900.U  20130715    10:09:34.628    0.24    0.24    10:09   10:14
BAG201310900.U  20130715    10:10:03.840    0.24    0.24    10:10   10:15
BAG201310900.U  20130715    10:10:04.939    0.25    0.24    10:10   10:15
BAG201310900.U  20130715    10:10:04.960    0.25    0.24    10:10   10:15
BAG201310900.U  20130715    10:10:04.989    0.25    0.24    10:10   10:15
BAG201310900.U  20130715    10:10:06.079    0.25    0.24    10:10   10:15
BAG201310900.U  20130715    10:10:06.090    0.25    0.24    10:10   10:15
BAG201310900.U  20130715    10:10:06.090    0.25    0.24    10:10   10:15
BAG201310900.U  20130715    10:10:08.850    0.25    0.24    10:10   10:15
BAG201310900.U  20130715    10:10:08.899    0.25    0.24    10:10   10:15
BAG201310900.U  20130715    10:10:08.920    0.25    0.24    10:10   10:15
BAG201310900.U  20130715    10:10:10.090    0.25    0.24    10:10   10:15
BAG201310900.U  20130715    10:46:08.210    0.24    0.22    10:46   10:51
BAG201310900.U  20130715    10:46:22.842    0.23    0.22    10:46   10:51
BAG201310900.U  20130715    10:46:22.842    0.23    0.22    10:46   10:51
BAG201310900.U  20130715    10:46:22.842    0.23    0.22    10:46   10:51
BAG201310900.U  20130715    10:46:22.842    0.23    0.22    10:46   10:51
BAG201310900.U  20130715    10:46:22.842    0.23    0.22    10:46   10:51
BAG201310900.U  20130715    10:46:22.842    0.23    0.22    10:46   10:51
BAG201310900.U  20130715    10:46:22.842    0.23    0.22    10:46   10:51
BAG201310900.U  20130715    10:46:22.842    0.23    0.22    10:46   10:51
BAG201310900.U  20130715    10:46:22.842    0.23    0.22    10:46   10:51
BAG201310900.U  20130715    10:46:22.842    0.23    0.22    10:46   10:51
BAG201310900.U  20130715    10:46:22.842    0.23    0.22    10:46   10:51
BAG201310900.U  20130715    10:46:25.331    0.23    0.22    10:46   10:51
BAG201310900.U  20130715    11:14:40.903    0.22    0.22    11:14   11:19
BAG201310900.U  20130715    11:26:52.196    0.22    0.25    11:26   11:31
BAG201310900.U  20130715    11:44:43.190    0.25    0.27    11:44   11:49
BAG201310900.U  20130715    11:44:43.211    0.25    0.27    11:44   11:49
BAG201310900.U  20130715    11:44:43.211    0.25    0.27    11:44   11:49
BAG201310900.U  20130715    11:44:43.211    0.25    0.27    11:44   11:49
BAG201310900.U  20130715    11:49:14.152    0.27    0.31    11:49   11:54
BAG201310900.U  20130715    12:09:12.418    0.31    0.3 12:09   12:14
BAG201310900.U  20130715    12:09:12.418    0.31    0.3 12:09   12:14
BAG201310900.U  20130715    12:09:12.418    0.31    0.3 12:09   12:14
BAG201310900.U  20130715    12:13:27.376    0.3 0.3 12:13   12:18
BAG201310900.U  20130715    12:14:48.365    0.3 0.3 12:14   12:19
BAG201310900.U  20130715    12:17:28.263    0.3 0.29    12:17   12:22
BAG201310900.U  20130715    12:17:43.893    0.3 0.29    12:17   12:22
BAG201310900.U  20130715    12:48:50.960    0.29    0.29    12:48   12:53
BAG201310900.U  20130715    12:49:59.878    0.29    0.29    12:49   12:54
BAG201310900.U  20130715    12:49:59.878    0.29    0.29    12:49   12:54
BAG201310900.U  20130715    12:49:59.898    0.29    0.29    12:49   12:54
BAG201310900.U  20130715    12:49:59.898    0.29    0.29    12:49   12:54
BAG201310900.U  20130715    12:49:59.898    0.29    0.29    12:49   12:54
BAG201310900.U  20130715    12:49:59.898    0.29    0.29    12:49   12:54
BAG201310900.U  20130715    12:49:59.898    0.29    0.29    12:49   12:54

enter image description here

2 个答案:

答案 0 :(得分:1)

我认为使用随机访问不会是一个很好的解决方案,特别是不使用重复随机访问。一个更好的解决方案可能是每天加载一个包含数据的哈希表(因为看起来你每天都有很多行)。然后使用散列迭代器找到t = 300 +行。您不提供示例数据,因此我无法真正为您提供完整的代码,但伪代码类似于:

data want;
  set have;
  by _ric date_l_;
  if _n_=1 then do; *declare hash table that's empty but has the structure of your have dataset; *declare a hash iterator for that table; end;
  if first.date_l_ then do; *load the hash table with that date's rows; end;
  *find the current row in the hash table;
  *now iterate over the hash table from that row until you get to the end or you get a t+300 row;
  *if you got t+300 row, then you have what you want, otherwise you're too far in the day and can stop looking - and probably should tell the data step to just skip all of the rest of the records for that day;
  if last.date_l_ then do; *empty/delete the hash table; end;
run;

答案 1 :(得分:0)

  

更具体地说,P(t + 5)至少观察5分钟的第一个价格   在衡量价格之后。“

此示例显示了自反SQL连接如何在最早的未来时间标记处获取和使用该行。答案需要一个不同的时间/价值流,而样本数据则不是。该示例用于演示目的。

data have;
attrib
  _RIC length=$20
  Date_L_ informat=yymmdd10. format=yymmdd10.
  Time_L_ informat=time15.3 format=time15.3
  price length=8
;
infile datalines missover;
input _RIC    Date_L_ Time_L_ Price;
timemark = dhms(date_l_, 0,0,0) + time_l_;
format timemark datetime21.3;
datalines;
BAG201310900.U  20130715    9:36:19.721 0.27  
BAG201310900.U  20130715    9:36:19.721 0.27  
BAG201310900.U  20130715    9:36:22.751 0.27  
BAG201310900.U  20130715    9:36:22.751 0.27  
BAG201310900.U  20130715    9:36:24.400 0.27  
BAG201310900.U  20130715    9:36:24.400 0.27  
BAG201310900.U  20130715    9:36:28.150 0.27  
BAG201310900.U  20130715    9:36:28.150 0.27  
BAG201310900.U  20130715    9:36:45.099 0.27  
BAG201310900.U  20130715    9:36:45.099 0.27  
BAG201310900.U  20130715    9:36:48.929 0.28  
BAG201310900.U  20130715    9:36:48.929 0.28  
BAG201310900.U  20130715    9:36:49.929 0.28  
BAG201310900.U  20130715    9:36:50.899 0.28  
BAG201310900.U  20130715    9:37:04.839 0.27  
BAG201310900.U  20130715    9:37:04.839 0.27  
BAG201310900.U  20130715    9:37:04.848 0.27  
BAG201310900.U  20130715    9:37:07.619 0.28  
BAG201310900.U  20130715    9:37:11.619 0.28  
BAG201310900.U  20130715    9:37:11.619 0.28  
BAG201310900.U  20130715    9:37:11.619 0.28  
BAG201310900.U  20130715    9:37:12.738 0.28  
BAG201310900.U  20130715    9:37:15.528 0.28  
BAG201310900.U  20130715    9:37:30.337 0.28  
BAG201310900.U  20130715    9:37:32.717 0.28  
BAG201310900.U  20130715    9:37:58.636 0.29  
BAG201310900.U  20130715    9:38:04.016 0.28  
BAG201310900.U  20130715    9:38:07.326 0.28  
BAG201310900.U  20130715    9:38:07.849 0.28  
BAG201310900.U  20130715    9:38:16.005 0.3 
BAG201310900.U  20130715    9:38:18.055 0.3 
BAG201310900.U  20130715    9:38:18.055 0.3 
BAG201310900.U  20130715    9:38:18.055 0.3 
BAG201310900.U  20130715    9:38:20.025 0.3 
run;

重复数据删除

proc sort data=have nodupkey;
  by _all_;
run;

自反连接(又称自连接)

proc sql;
  create table want as
  select 
    have._RIC
  , have.timemark
  , have.price
  , future.timemark as timemark_at_5m_threshold
  , future.price as price_at_5m_threshold
  , future.timemark - have.timemark as interval_at_5m_threshold
  from  
    have
  left join
    have as future
  on 
    have._RIC = future._RIC
    and future.timemark > have.timemark + 50 /* 50 seconds because sample data only covers 2 minutes */
  group by
    have._RIC, have.timemark
  having
    /* first of all future matches
     * - this is why you want discrete timemarks 
     *   when timemark has dups you would have multiple rows with same min
     *   and replication in result set
     */

    future.timemark = min(future.timemark)  

    /* NOTE: an expression with a non-aggregate reference and an 
     * aggregate reference causes Proc SQL to automatically remerge.
     * That is a good thing. Log will show
     * NOTE: The query requires remerging summary statistics back with the original data.
     */
  ;