将data.frame列表中的项目组合到单个data.table中

时间:2013-05-16 14:54:37

标签: r loops dataframe rows data.table

我有一个命名的data.frame,其中包含10个项目,每个项目有三列。 data.frame中的每个项目如下所示:

$RandArcBo1
$RandArcBo1$x
 [1]  -97.5 -174.5  156.5 -172.5  111.5 -132.5  142.5  115.5  146.5  171.5  151.5  162.5  168.5 -153.5  129.5  123.5

$RandArcBo1$y
 [1] 86.56732 79.56732 59.56732 77.56732 77.56732 75.56732 58.56732 82.56732 84.56732 74.56732 78.56732 74.56732 76.56732
[14] 77.56732 78.56732 84.56732

$RandArcBo1$Species
[1] "RandArcBo1"

我想将此转换为一个dataset表,并重新排列列。我目前的做法是:

tempList <- c()
for (j in seq_along(dfList)){
   tempList <- append(tempList, as.data.table(c(dfList[[j]][3], dfList[[j]][1], dfList[[j]][2])))
}
assign(paste(name, "RandPoints", sep = ""), rbindlist(tempList))

然而,当我运行此操作时,我收到以下错误:

Error in rbindlist(tempList) : 
  Item 1 of list input is not a data.frame, data.table or list

有没有办法解决这个错误并使用rbindlist,或者可能是一种迭代方式向data.table添加行的方法?我也尝试过这种方法,并且无法解决如何创建空data.table来添加行的问题。

有关详细信息,请参阅以下完整脚本:

#You will need to collect the following in a folder specified as the working directory:
#   ASCII layers representing the Ms for each species to use as a mask
#   a file named "NumbLoc.csv" with names of ASCII layers and the number of points to be randomly generated in each
require(raster)
require(dismo)
require(SDMTools)
require(data.table)
#Get files
fileList <- list.files(pattern="*.asc", full.names=TRUE)
numbPoints <- read.csv("NumbLoc.csv", header = TRUE, sep = ",", stringsAsFactors=FALSE)
nPoints<-as.data.frame(numbPoints)
#Loop through files in specified folder
for (a in 1:length(fileList)){
  name <- substr(fileList[a], 3, 7 )
  print(name)
  #Reads in ASCII, changes it to raster
  #Raster will serve as a mask for generation of random points
  assign(name,raster(fileList[a]))
  #starts/clears a list of dataframes for each species
  dfList <- c()
  #loops through however many iterations you want, generating random points as specified in csv file
  for(i in 1:10){
    nameRand <- paste("Rand", substr(fileList[a], 3, 7),i, sep = "")
    dfList[[nameRand]]=c(as.data.frame(randomPoints(get(name), nPoints$Number[nPoints$Species==name])))
  }
  #adds a column in each data frame specifying the name of the randomized run
  #converts data frame to a series of data table with columns in the correct order
  tempList <- c()
  for (j in seq_along(dfList)){
    dfList[[j]]$Species <- names(dfList[j])
    #assign(tempListName, as.data.table(c(dfList[[j]][3], dfList[[j]][1], dfList[[j]][2])))
    tempList <- append(tempList, as.data.table(c(dfList[[j]][3], dfList[[j]][1], dfList[[j]][2])))
  }
  assign(paste(name, "RandPoints", sep = ""), rbindlist(tempList))
}

数据框如下所示:

> dput(head(dfList))
structure(list(RandArcBo1 = structure(list(x = c(-97.5, -174.5, 
156.5, -172.5, 111.5, -132.5, 142.5, 115.5, 146.5, 171.5, 151.5, 
162.5, 168.5, -153.5, 129.5, 123.5), y = c(86.567321777, 79.567321777, 
59.567321777, 77.567321777, 77.567321777, 75.567321777, 58.567321777, 
82.567321777, 84.567321777, 74.567321777, 78.567321777, 74.567321777, 
76.567321777, 77.567321777, 78.567321777, 84.567321777), Species = "RandArcBo1"), .Names = c("x", 
"y", "Species")), RandArcBo2 = structure(list(x = c(170.5, -160.5, 
150.5, 165.5, 78.5, 74.5, -161.5, -129.5, -134.5, -164.5, 166.5, 
-169.5, -156.5, -163.5, 89.5, 150.5), y = c(86.567321777, 76.567321777, 
57.567321777, 70.567321777, 77.567321777, 78.567321777, 74.567321777, 
74.567321777, 72.567321777, 80.567321777, 84.567321777, 85.567321777, 
78.567321777, 80.567321777, 76.567321777, 83.567321777), Species = "RandArcBo2"), .Names = c("x", 
"y", "Species")), RandArcBo3 = structure(list(x = c(179.5, 149.5, 
-153.5, 129.5, 158.5, 169.5, 131.5, -147.5, -140.5, 128.5, 173.5, 
-172.5, -177.5, -91.5, -143.5, 87.5), y = c(63.567321777, 57.567321777, 
72.567321777, 80.567321777, 50.567321777, 58.567321777, 76.567321777, 
72.567321777, 72.567321777, 81.567321777, 58.567321777, 88.567321777, 
88.567321777, 72.567321777, 73.567321777, 82.567321777), Species = "RandArcBo3"), .Names = c("x", 
"y", "Species")), RandArcBo4 = structure(list(x = c(114.5, 156.5, 
76.5, 171.5, -137.5, -140.5, -142.5, 135.5, 152.5, -136.5, -167.5, 
-131.5, 94.5, 154.5, -78.5, -124.5), y = c(83.567321777, 80.567321777, 
78.567321777, 57.567321777, 69.567321777, 73.567321777, 80.567321777, 
84.567321777, 52.567321777, 70.567321777, 67.567321777, 70.567321777, 
85.567321777, 85.567321777, 84.567321777, 81.567321777), Species = "RandArcBo4"), .Names = c("x", 
"y", "Species")), RandArcBo5 = structure(list(x = c(-162.5, 79.5, 
179.5, 166.5, 81.5, 115.5, 155.5, 84.5, 163.5, 166.5, 178.5, 
-119.5, -157.5, 128.5, 118.5, 164.5), y = c(77.567321777, 82.567321777, 
74.567321777, 55.567321777, 81.567321777, 81.567321777, 52.567321777, 
75.567321777, 51.567321777, 73.567321777, 61.567321777, 75.567321777, 
77.567321777, 81.567321777, 84.567321777, 55.567321777), Species = "RandArcBo5"), .Names = c("x", 
"y", "Species")), RandArcBo6 = structure(list(x = c(-148.5, 123.5, 
-130.5, 164.5, -129.5, 168.5, -106.5, 144.5, 166.5, -127.5, -135.5, 
96.5, -95.5, 76.5, -99.5, -144.5), y = c(71.567321777, 78.567321777, 
85.567321777, 70.567321777, 76.567321777, 79.567321777, 85.567321777, 
58.567321777, 56.567321777, 78.567321777, 74.567321777, 76.567321777, 
86.567321777, 73.567321777, 82.567321777, 75.567321777), Species = "RandArcBo6"), .Names = c("x", 
"y", "Species"))), .Names = c("RandArcBo1", "RandArcBo2", "RandArcBo3", 
"RandArcBo4", "RandArcBo5", "RandArcBo6"))

2 个答案:

答案 0 :(得分:3)

这是您当前问题的解决方案,但我建议您重新考虑您的初始数据结构:

 rbindlist(lapply(dfList, as.data.table))

答案 1 :(得分:2)

这是尝试解开这个问题。但是你真的让这太复杂了。

#read.csv returns a data frame. No need for this.
nPoints<-as.data.frame(numbPoints)

#Loop through files in specified folder
for (a in 1:length(fileList)){
  name <- substr(fileList[a], 3, 7 )
  print(name)
  #Reads in ASCII, changes it to raster
  #Raster will serve as a mask for generation of random points
  assign(name,raster(fileList[a]))
  #starts/clears a list of dataframes for each species
  # If it's supposed to be a list, don't use c()!
  dfList <- list()
  #loops through however many iterations you want, generating random points as specified in csv file
  for(i in 1:10){
    nameRand <- paste("Rand", substr(fileList[a], 3, 7),i, sep = "")
    #Again, don't use c()!
    dfList[[nameRand]] <- as.data.frame(randomPoints(get(name), nPoints$Number[nPoints$Species==name]))
  }
  #adds a column in each data frame specifying the name of the randomized run
  #converts data frame to a series of data table with columns in the correct order
  tempList <- list()
  for (j in seq_along(dfList)){
    dfList[[j]]$Species <- names(dfList[j])
    #assign(tempListName, as.data.table(c(dfList[[j]][3], dfList[[j]][1], dfList[[j]][2])))
    #No need to coerce it _again_ if it's already a data frame. Just
    # reorder the columns the easy way.
    tempList <- append(tempList, dfList[[j]][,c(3,1,2)])
  }
  assign(paste(name, "RandPoints", sep = ""), do.call(rbind,tempList))
}

更一般地说,您的大多数麻烦来自您使用assignget等功能。这几乎不应该在R中使用。确实没有必要。例如,脚本的第一部分基本上可以用4行完成:

#Loop over file list, calling raster on each
rasterList <- lapply(fileList,raster)
#Assign all the names at once.
names(rasterList) <- substr(fileList,3,7)

#Need to make sure that the rasters are in the same order as the values
# in nPoints$Number
dfList <- mapply(randomPoints,mask = rasterList,n = nPoints$Number)
names(dfList) <- paste0("Rand",names(rasterList))

此时,我只需将数据帧重新组合成一个:

ns <- sapply(dfList,nrow)
out <- do.call(dfList,rbind)

然后通过以下内容创建Species列:

out$Species <- rep(paste0("Rand",names(rasterList)),times = ns)