在语言R中,我有以下两个数据框
sref_df
unit ft event_time cum_ft
7215 165755 0.0000000 01/03/2014 10:29:13 0.000000
7214 165755 0.0000000 01/06/2014 17:13:45 0.000000
7774 165755 0.0000000 01/09/2014 11:17:06 0.000000
8581 165755 0.0000000 01/10/2014 12:12:29 0.000000
10326 165755 1.2624167 01/10/2014 13:50:54 1.262417
7219 165755 1.0894306 01/10/2014 16:40:38 2.351847
7216 165755 0.0000000 01/11/2014 11:43:24 2.351847
2221 165755 0.0000000 01/12/2014 12:52:53 2.351847
1832 165755 1.0176389 01/13/2014 07:56:00 3.369486
1528 165755 0.9430278 01/13/2014 16:22:43 4.312514
event_df
unit event_time
8642 165755 01/03/2014 10:30:01
8643 165755 01/03/2014 10:31:01
8641 165755 01/06/2014 17:14:44
9318 165755 01/09/2014 11:17:49
10257 165755 01/10/2014 12:13:23
12333 165755 01/10/2014 13:51:48
8647 165755 01/10/2014 16:41:30
8644 165755 01/11/2014 11:44:06
2806 165755 01/12/2014 12:53:46
2292 165755 01/13/2014 07:56:54
Ref具有不同的单位值,事件只有一个单位 Ref已按单位排序,然后按event_time排序 对于事件数据框中的每一行 从参考数据框中提取cum_ft 其中参考数据帧中的event_time最接近事件数据帧中的event_time或等于event_time。 将提取的cum_ft添加到event_df
我正在尝试以下操作,但不会运行。我不知道怎么写" irow ="线。
bref_df <- data.frame(unit=integer(),ft=double(),
event_time=as.Date(character()),
cum_ft=double(),
stringsAsFactors=FALSE)
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 0.0000000,
event_time=strptime('01/03/2014 10:29:13',format='%m/%d/%Y %H:%M:%S'), cum_ft = 0.000000))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 0.0000000,
event_time=strptime('01/06/2014 17:13:45',format='%m/%d/%Y %H:%M:%S'), cum_ft = 0.000000))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 0.0000000,
event_time=strptime('01/09/2014 11:17:06',format='%m/%d/%Y %H:%M:%S'), cum_ft = 0.000000))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 0.0000000,
event_time=strptime('01/10/2014 12:12:29',format='%m/%d/%Y %H:%M:%S'), cum_ft = 0.000000))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 1.2624167,
event_time=strptime('01/10/2014 13:50:54',format='%m/%d/%Y %H:%M:%S'), cum_ft = 1.262417))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 1.0894306,
event_time=strptime('01/10/2014 16:40:38',format='%m/%d/%Y %H:%M:%S'), cum_ft = 2.351847))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 0.0000000,
event_time=strptime('01/11/2014 11:43:24',format='%m/%d/%Y %H:%M:%S'), cum_ft = 2.351847))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 0.0000000,
event_time=strptime('01/12/2014 12:52:53',format='%m/%d/%Y %H:%M:%S'), cum_ft = 2.351847))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 1.0176389,
event_time=strptime('01/13/2014 07:56:00',format='%m/%d/%Y %H:%M:%S'), cum_ft = 3.369486))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 0.9430278,
event_time=strptime('01/13/2014 16:22:43',format='%m/%d/%Y %H:%M:%S'), cum_ft = 4.312514))
eref_df <- data.frame(unit=integer(),ft=double(),
event_time=as.Date(character()),
stringsAsFactors=FALSE)
eref_df <- rbind(eref_df , data.frame(unit = 165755, event_time=strptime('01/03/2014 10:30:01',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755, event_time=strptime('01/03/2014 10:31:01',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755, event_time=strptime('01/06/2014 17:14:44',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755, event_time=strptime('01/09/2014 11:17:49',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755, event_time=strptime('01/10/2014 12:13:23',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755, event_time=strptime('01/10/2014 13:51:48',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755, event_time=strptime('01/10/2014 16:41:30',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755, event_time=strptime('01/11/2014 11:44:06',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755, event_time=strptime('01/12/2014 12:53:46',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755, event_time=strptime('01/13/2014 07:56:54',format='%m/%d/%Y %H:%M:%S')))
sref_df<-bref_df[with(bref_df, order(unit, event_time)), ]
print(sref_df)
uUnit = 165755
event_df=eref_df[eref_df$unit==uUnit,]
sevent_df=eref_df[with(event_df, order(event_time)), ]
print(sevent_df)
for (iTime in seq(sevent_df$event_time)) {
aTime = sevent_df$event_time[iTime]
irow = which(max(sref_df$event_time[sref_df$event_time<=aTime]))
sevent_df$matchRow[iTime] = irow
sevent_df$cum_ft[iTime] = sref_df$cum_ft[irow]
}
print(sevent_df)
所需的输出是
index unit event_time match cum_ft
1 165755 1/3/2014 10:30 1 0
2 165755 1/3/2014 10:31 1 0
3 165755 1/6/2014 17:14 2 0
4 165755 1/9/2014 11:17 3 0
5 165755 1/10/2014 12:13 4 0
6 165755 1/10/2014 13:51 5 1.262417
7 165755 1/10/2014 16:41 6 2.351847
8 165755 1/11/2014 11:44 7 2.351847
9 165755 1/12/2014 12:53 8 2.351847
10 165755 1/13/2014 7:56 9 3.369486
event_df has 24600 rows of search criteria (event_time and unit) to match.
sref_df has 20600 rows containing the event_time and unit to search through for the matching unit and closest prior event_time in order to to extract the matching row and cum_ft
答案 0 :(得分:1)
这是一种方式:
diff_matrix <- sapply(event_df$event_time, function(x) x-sref_df$event_time)
diff_matrix[diff_matrix < 0] <- NA
event_df$cum_ft <-
sref_df$cum_ft[apply(diff_matrix, 2, function(x) which(x == min(x, na.rm=TRUE)))]
# unit event_time cum_ft
#8642 165755 2014-01-03 10:30:01 0.000000
#8643 165755 2014-01-03 10:31:01 0.000000
#8641 165755 2014-01-06 17:14:44 0.000000
#9318 165755 2014-01-09 11:17:49 0.000000
#10257 165755 2014-01-10 12:13:23 0.000000
#12333 165755 2014-01-10 13:51:48 1.262417
#8647 165755 2014-01-10 16:41:30 2.351847
#8644 165755 2014-01-11 11:44:06 2.351847
#2806 165755 2014-01-12 12:53:46 2.351847
#2292 165755 2014-01-13 07:56:54 3.369486
您可以在所需的输出中添加match
列,如下所示:
event_df$match <- apply(diff_matrix, 2, function(x) which(x == min(x, na.rm=TRUE)))