Question

我正在尝试将可变长度的矢量存储在现有数据帧的新列中。

我的初始数据框 -

data - 
job_id usetime
abc    2345
abc1   4353
jsdf   34985

我有一个包含数据帧索引的数字向量（indices_excluded）。使用这些数字我必须提取相应索引的使用时间。

我想在名为＆＃34; runtime_excluded＆＃34;的新列中存储与行索引相对应的所有这些使用时间。

为此，我尝试在循环中运行此代码（应用于i）
data[i, "runtime_excluded"]<-I(list(data[indices_excluded, "USETIME"]))
数字向量＆＃34; indices_excluded＆＃34;不断改变每次迭代。

这是一个警告说

value = list（：替换元素1有2行替换1行

它只存储列表的第一个元素我想将所有使用时间存储在该数据框元素中期望的输出 -

data - 
job_id   usetime   runtime_excluded
abc      2345      234,4325
abcd     4353      2435
abcde     34985     2134, 234234, 34223

我遇到了一些相关问题，例如one，two，three，但我可以找到问题的答案。

编辑 -

我的初始数据框 -

data - 
job_id starttime  endtime  endtime_modified  usetime
abc    1          23       20                22
abc1   2          15       13                13
jsdf   30         40       39                10

我正在运行的代码 -

k=nrow(data)
for(i in 1:k)
{
        indices_peak<-which((data[i,"endtime"] >= data$starttime) 
                             & (data[i,"endtime"] <= data$endtime)

        indices_peak95<-which((data[i,"endtime_modified"] >= data$starttime) 
                               & (data[i,"endtime_modified"] <= data$endtime_modified)

        indices_excluded<-indices_peak[!indices_peak %in% indices_peak95]
        data[i,"peak"]<-length(indices_peak)
        data[i,"peak_95"]<-length(indices_peak95)
        data$runtime_excluded[i]<-data[indices_excluded, "USETIME"]

}

期望的输出 -

job_id starttime  endtime  endtime_modified  usetime  peak  peak_95  runtime_excluded
abc    1          24       22                22       2     2       20
abc1   2          24       20                22       2     3       -
jsdf   3          23       23                 9       3     1       22,20

开始时间和结束时间以秒为单位，并指向特定时间。

Answer 1

不确定我是否理解正确，无论如何，这里的示例与建议的here非常相似：

# your initial data.frame 
data <- data.frame(job_id = c('abc','abc1','jsdf'), usetime = c(2345,4353,34985))

# initialize runtime_excluded with an empty list
data$runtime_excluded <- vector(mode = "list",length=nrow(data)) 

# > data
#   job_id usetime runtime_excluded
# 1    abc    2345             NULL
# 2   abc1    4353             NULL
# 3   jsdf   34985             NULL

# example of initialization in a for-loop
for(i in 1:3){
  data$runtime_excluded[[i]] <- 1:i
  # or, similarly :
  # data[['runtime_excluded']][[i]] <- 1:i
}

# > data
#   job_id usetime runtime_excluded
# 1    abc    2345                1
# 2   abc1    4353             1, 2
# 3   jsdf   34985          1, 2, 3

编辑：

这是您的代码的工作版本：

data <- data.frame(job_id = c('abc','abc1','jsdf'), 
                   starttime = c(1,2,3),
                   endtime = c(24,24,23),
                   endtime_modified = c(22,20,23),
                   usetime = c(22,22,9)
                   )
# > data
#   job_id starttime endtime endtime_modified usetime
# 1    abc         1      24               22      22
# 2   abc1         2      24               20      22
# 3   jsdf         3      23               23       9


# initialize runtime_excluded with an empty list
data$runtime_excluded <- vector(mode = "list",length=nrow(data)) 

k=nrow(data)
for(i in 1:k)
{
  indices_peak<-which((data[i,"endtime"] >= data$starttime) & (data[i,"endtime"] <= data$endtime))
  indices_peak95<-which((data[i,"endtime_modified"] >= data$starttime) & (data[i,"endtime_modified"] <= data$endtime_modified))

  indices_excluded<-indices_peak[!indices_peak %in% indices_peak95]
  data[i,"peak"]<-length(indices_peak)
  data[i,"peak_95"]<-length(indices_peak95)
  vect <- data[indices_excluded, "usetime"] # here's the integer(0) problem, solved using the if-statement below
  if(!is.null(vect)){
    data$runtime_excluded[[i]] <- vect
  }
}

# > data
# job_id starttime endtime endtime_modified usetime runtime_excluded peak peak_95
# 1    abc         1      24               22      22               22    2       2
# 2   abc1         2      24               20      22                     2       3
# 3   jsdf         3      23               23       9           22, 22    3       1

Answer 2

这对我来说很有用 data[i, "runtime_excluded"]<- paste(data[indices_excluded, "USETIME"], collapse=",")

在数据框元素中存储向量

2 个答案: