从具有最接近或等于目标日期的日期的行中提取值

时间:2014-08-22 21:05:35

标签: r row closest

在语言R中,我有以下两个数据框

sref_df

        unit        ft          event_time   cum_ft
7215  165755 0.0000000 01/03/2014 10:29:13 0.000000
7214  165755 0.0000000 01/06/2014 17:13:45 0.000000
7774  165755 0.0000000 01/09/2014 11:17:06 0.000000
8581  165755 0.0000000 01/10/2014 12:12:29 0.000000
10326 165755 1.2624167 01/10/2014 13:50:54 1.262417
7219  165755 1.0894306 01/10/2014 16:40:38 2.351847
7216  165755 0.0000000 01/11/2014 11:43:24 2.351847
2221  165755 0.0000000 01/12/2014 12:52:53 2.351847
1832  165755 1.0176389 01/13/2014 07:56:00 3.369486
1528  165755 0.9430278 01/13/2014 16:22:43 4.312514

event_df

        unit          event_time
8642  165755 01/03/2014 10:30:01
8643  165755 01/03/2014 10:31:01
8641  165755 01/06/2014 17:14:44
9318  165755 01/09/2014 11:17:49
10257 165755 01/10/2014 12:13:23
12333 165755 01/10/2014 13:51:48
8647  165755 01/10/2014 16:41:30
8644  165755 01/11/2014 11:44:06
2806  165755 01/12/2014 12:53:46
2292  165755 01/13/2014 07:56:54

Ref具有不同的单位值,事件只有一个单位 Ref已按单位排序,然后按event_time排序 对于事件数据框中的每一行 从参考数据框中提取cum_ft 其中参考数据帧中的event_time最接近事件数据帧中的event_time或等于event_time。 将提取的cum_ft添加到event_df

我正在尝试以下操作,但不会运行。我不知道怎么写" irow ="线。

bref_df <- data.frame(unit=integer(),ft=double(),
             event_time=as.Date(character()),
             cum_ft=double(),
             stringsAsFactors=FALSE) 
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 0.0000000, 
  event_time=strptime('01/03/2014 10:29:13',format='%m/%d/%Y %H:%M:%S'), cum_ft = 0.000000))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 0.0000000, 
  event_time=strptime('01/06/2014 17:13:45',format='%m/%d/%Y %H:%M:%S'), cum_ft = 0.000000))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 0.0000000, 
  event_time=strptime('01/09/2014 11:17:06',format='%m/%d/%Y %H:%M:%S'), cum_ft = 0.000000))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 0.0000000, 
  event_time=strptime('01/10/2014 12:12:29',format='%m/%d/%Y %H:%M:%S'), cum_ft = 0.000000))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 1.2624167, 
  event_time=strptime('01/10/2014 13:50:54',format='%m/%d/%Y %H:%M:%S'), cum_ft = 1.262417))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 1.0894306, 
  event_time=strptime('01/10/2014 16:40:38',format='%m/%d/%Y %H:%M:%S'), cum_ft = 2.351847))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 0.0000000, 
  event_time=strptime('01/11/2014 11:43:24',format='%m/%d/%Y %H:%M:%S'), cum_ft = 2.351847))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 0.0000000, 
  event_time=strptime('01/12/2014 12:52:53',format='%m/%d/%Y %H:%M:%S'), cum_ft = 2.351847))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 1.0176389, 
  event_time=strptime('01/13/2014 07:56:00',format='%m/%d/%Y %H:%M:%S'), cum_ft = 3.369486))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 0.9430278, 
  event_time=strptime('01/13/2014 16:22:43',format='%m/%d/%Y %H:%M:%S'), cum_ft = 4.312514))

eref_df <- data.frame(unit=integer(),ft=double(),
                 event_time=as.Date(character()),
                 stringsAsFactors=FALSE) 
eref_df <- rbind(eref_df , data.frame(unit = 165755,  event_time=strptime('01/03/2014 10:30:01',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755,  event_time=strptime('01/03/2014 10:31:01',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755,  event_time=strptime('01/06/2014 17:14:44',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755,  event_time=strptime('01/09/2014 11:17:49',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755,  event_time=strptime('01/10/2014 12:13:23',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755,  event_time=strptime('01/10/2014 13:51:48',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755,  event_time=strptime('01/10/2014 16:41:30',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755,  event_time=strptime('01/11/2014 11:44:06',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755,  event_time=strptime('01/12/2014 12:53:46',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755,  event_time=strptime('01/13/2014 07:56:54',format='%m/%d/%Y %H:%M:%S')))

sref_df<-bref_df[with(bref_df, order(unit, event_time)), ]
print(sref_df)

uUnit = 165755
event_df=eref_df[eref_df$unit==uUnit,]
sevent_df=eref_df[with(event_df, order(event_time)), ]
print(sevent_df)

for (iTime in seq(sevent_df$event_time)) {
  aTime = sevent_df$event_time[iTime]
  irow = which(max(sref_df$event_time[sref_df$event_time<=aTime]))
  sevent_df$matchRow[iTime] = irow 
  sevent_df$cum_ft[iTime] = sref_df$cum_ft[irow]
}

print(sevent_df)

所需的输出是

index   unit    event_time  match   cum_ft
1   165755  1/3/2014 10:30  1   0
2   165755  1/3/2014 10:31  1   0
3   165755  1/6/2014 17:14  2   0
4   165755  1/9/2014 11:17  3   0
5   165755  1/10/2014 12:13 4   0
6   165755  1/10/2014 13:51 5   1.262417
7   165755  1/10/2014 16:41 6   2.351847
8   165755  1/11/2014 11:44 7   2.351847
9   165755  1/12/2014 12:53 8   2.351847
10  165755  1/13/2014 7:56  9   3.369486

event_df has 24600 rows of search criteria (event_time and unit) to match.
sref_df has 20600 rows containing the event_time and unit to search through for the matching unit and closest prior event_time in order to to extract the matching row and cum_ft

1 个答案:

答案 0 :(得分:1)

这是一种方式:

diff_matrix <- sapply(event_df$event_time, function(x) x-sref_df$event_time)
diff_matrix[diff_matrix < 0] <- NA

event_df$cum_ft <- 
  sref_df$cum_ft[apply(diff_matrix, 2, function(x) which(x == min(x, na.rm=TRUE)))]

#        unit          event_time   cum_ft
#8642  165755 2014-01-03 10:30:01 0.000000
#8643  165755 2014-01-03 10:31:01 0.000000
#8641  165755 2014-01-06 17:14:44 0.000000
#9318  165755 2014-01-09 11:17:49 0.000000
#10257 165755 2014-01-10 12:13:23 0.000000
#12333 165755 2014-01-10 13:51:48 1.262417
#8647  165755 2014-01-10 16:41:30 2.351847
#8644  165755 2014-01-11 11:44:06 2.351847
#2806  165755 2014-01-12 12:53:46 2.351847
#2292  165755 2014-01-13 07:56:54 3.369486

您可以在所需的输出中添加match列,如下所示:

event_df$match <- apply(diff_matrix, 2, function(x) which(x == min(x, na.rm=TRUE)))