我正在尝试将可变长度的矢量存储在现有数据帧的新列中。
我的初始数据框 -
data -
job_id usetime
abc 2345
abc1 4353
jsdf 34985
我有一个包含数据帧索引的数字向量(indices_excluded)。使用这些数字我必须提取相应索引的使用时间。
我想在名为" runtime_excluded"的新列中存储与行索引相对应的所有这些使用时间。
为此,我尝试在循环中运行此代码(应用于i)
data[i, "runtime_excluded"]<-I(list(data[indices_excluded, "USETIME"]))
数字向量&#34; indices_excluded&#34;不断改变每次迭代。
这是一个警告说
value = list(: 替换元素1有2行替换1行
它只存储列表的第一个元素 我想将所有使用时间存储在该数据框元素中 期望的输出 -
data -
job_id usetime runtime_excluded
abc 2345 234,4325
abcd 4353 2435
abcde 34985 2134, 234234, 34223
我遇到了一些相关问题,例如one,two,three,但我可以找到问题的答案。
编辑 -
我的初始数据框 -
data -
job_id starttime endtime endtime_modified usetime
abc 1 23 20 22
abc1 2 15 13 13
jsdf 30 40 39 10
我正在运行的代码 -
k=nrow(data)
for(i in 1:k)
{
indices_peak<-which((data[i,"endtime"] >= data$starttime)
& (data[i,"endtime"] <= data$endtime)
indices_peak95<-which((data[i,"endtime_modified"] >= data$starttime)
& (data[i,"endtime_modified"] <= data$endtime_modified)
indices_excluded<-indices_peak[!indices_peak %in% indices_peak95]
data[i,"peak"]<-length(indices_peak)
data[i,"peak_95"]<-length(indices_peak95)
data$runtime_excluded[i]<-data[indices_excluded, "USETIME"]
}
期望的输出 -
job_id starttime endtime endtime_modified usetime peak peak_95 runtime_excluded
abc 1 24 22 22 2 2 20
abc1 2 24 20 22 2 3 -
jsdf 3 23 23 9 3 1 22,20
开始时间和结束时间以秒为单位,并指向特定时间。
答案 0 :(得分:0)
不确定我是否理解正确,无论如何,这里的示例与建议的here非常相似:
# your initial data.frame
data <- data.frame(job_id = c('abc','abc1','jsdf'), usetime = c(2345,4353,34985))
# initialize runtime_excluded with an empty list
data$runtime_excluded <- vector(mode = "list",length=nrow(data))
# > data
# job_id usetime runtime_excluded
# 1 abc 2345 NULL
# 2 abc1 4353 NULL
# 3 jsdf 34985 NULL
# example of initialization in a for-loop
for(i in 1:3){
data$runtime_excluded[[i]] <- 1:i
# or, similarly :
# data[['runtime_excluded']][[i]] <- 1:i
}
# > data
# job_id usetime runtime_excluded
# 1 abc 2345 1
# 2 abc1 4353 1, 2
# 3 jsdf 34985 1, 2, 3
编辑:
这是您的代码的工作版本:
data <- data.frame(job_id = c('abc','abc1','jsdf'),
starttime = c(1,2,3),
endtime = c(24,24,23),
endtime_modified = c(22,20,23),
usetime = c(22,22,9)
)
# > data
# job_id starttime endtime endtime_modified usetime
# 1 abc 1 24 22 22
# 2 abc1 2 24 20 22
# 3 jsdf 3 23 23 9
# initialize runtime_excluded with an empty list
data$runtime_excluded <- vector(mode = "list",length=nrow(data))
k=nrow(data)
for(i in 1:k)
{
indices_peak<-which((data[i,"endtime"] >= data$starttime) & (data[i,"endtime"] <= data$endtime))
indices_peak95<-which((data[i,"endtime_modified"] >= data$starttime) & (data[i,"endtime_modified"] <= data$endtime_modified))
indices_excluded<-indices_peak[!indices_peak %in% indices_peak95]
data[i,"peak"]<-length(indices_peak)
data[i,"peak_95"]<-length(indices_peak95)
vect <- data[indices_excluded, "usetime"] # here's the integer(0) problem, solved using the if-statement below
if(!is.null(vect)){
data$runtime_excluded[[i]] <- vect
}
}
# > data
# job_id starttime endtime endtime_modified usetime runtime_excluded peak peak_95
# 1 abc 1 24 22 22 22 2 2
# 2 abc1 2 24 20 22 2 3
# 3 jsdf 3 23 23 9 22, 22 3 1
答案 1 :(得分:0)
这对我来说很有用
data[i, "runtime_excluded"]<- paste(data[indices_excluded, "USETIME"], collapse=",")