如何在R中的空数据框中添加行

时间:2015-02-14 16:46:12

标签: r

我已尝试过所有内容,但我无法在空数据框中添加行。我意识到它添加的第一行但是从第二行到结尾我得到了这个错误: 无效因子水平,NA生成。我希望你能帮帮我!!非常感谢你的帮助!

   Table <- data.frame()

   for (i in 1:length(dfMoviesList)){

       ID = paste0("DF",i)
       Value = (dfMoviesList[[i]]$TITULO[1])
       Table <- rbind(Table,c(ID,Value)) 
   }

2 个答案:

答案 0 :(得分:1)

您可以在运行代码之前设置stringsAsFactors=FALSE

op <- options(stringsAsFactors=FALSE)
Table <- data.frame()
for (i in 1:length(dfMoviesList)){
     ID = paste0("DF",i)
    Value = (dfMoviesList[[i]]$TITUL0[1])
    Table <- rbind(Table,c(ID,Value)) 
 }
options(op) #changes to default settings

更新

如果速度有问题,您也可以尝试

ID <- paste0('DF', seq_along(dfMoviesList))
res <- data.frame(ID, Value=vapply(dfMoviesList, 
          function(x) x$TITUL0[1], numeric(1L))

数据

set.seed(24)
 dfMoviesList <- lapply(1:3, function(i) 
    data.frame(TITUL0= sample(1:5), val=rnorm(5)) )

基准

set.seed(24)
dfMoviesList <- lapply(1:10000, function(i) 
        data.frame(TITUL0= sample(1:5)))

akrun1 <- function() { ID <- paste0('DF', seq_along(dfMoviesList))
                   data.frame(ID, Value=vapply(dfMoviesList, 
             function(x) x$TITUL0[1], numeric(1L)))
                    }
#included a data.table solution also
library(data.table)
akrun2 <- function() {DT <-  rbindlist(setNames(dfMoviesList, 
      paste0('DF', seq_along(dfMoviesList))), idcol=TRUE)
                     DT[DT[, .I[1L], .id]$V1]}   


dariober <- function(){
  Table<- matrix(nrow= length(dfMoviesList), ncol= 2, data= NA)

  for (i in 1:length(dfMoviesList)){
       ID<- paste0("DF",i)
       Value<- dfMoviesList[[i]]$TITUL0[1]
       Table[i,]<- c(ID, Value)
   }
 Table<- data.frame(ID= Table[,1], Value= Table[,2])

 }

 library(microbenchmark)

 microbenchmark(akrun1(), akrun2(), dariober(), times=20L, 
          unit='relative')
 #Unit: relative
 #     expr      min       lq     mean   median       uq      max neval cld
 #   akrun1() 2.214390 2.193538 2.055775 2.173440 2.148606 1.615028    20  b 
 #   akrun2() 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000    20 a  
 # dariober() 3.226717 3.198742 2.984970 3.174609 3.139982 2.189399    20   c

答案 1 :(得分:1)

由于您事先知道最终data.frame中将包含多少行和列,因此初始化正确大小的空矩阵,填充它并将其转换为data.frame要快得多。 。

在for循环中,您建议在每次迭代时删除并创建Table对象,即使循环次数不是太大,这也会非常慢。例如见:

10000部电影的样本:

dfMoviesList <- lapply(1:10000, function(i) 
    data.frame(TITUL0= sample(1:5)))

空矩阵startegy:

system.time({
Table<- matrix(nrow= length(dfMoviesList), ncol= 2, data= NA)

for (i in 1:length(dfMoviesList)){
    ID<- paste0("DF",i)
    Value<- dfMoviesList[[i]]$TITUL0[1]
    Table[i,]<- c(ID, Value)
}
Table<- data.frame(ID= Table[,1], Value= Table[,2])
})
   user  system elapsed 
  0.129   0.001   0.130 

比较:

system.time({
op <- options(stringsAsFactors=FALSE)
Table <- data.frame()
for (i in 1:length(dfMoviesList)){
    ID = paste0("DF",i)
    Value = (dfMoviesList[[i]]$TITUL0[1])
    Table <- rbind(Table,c(ID,Value)) 
}
options(op)
})
   user  system elapsed 
 12.316   2.855  15.180