我真的需要学习如何做到这一点,因为我已经做了很长时间的可怕,可怕的痛苦方式。我有一个我想要的数据框列表:
自动执行这些操作的最佳方法是什么,以便我不为每个数据框实例编写函数。对我来说,一个棘手的部分是在data.tables列表上操作时访问habitat_names向量中的名称。这是我现有的代码:
iucn_1_4 <- data.table(read.csv("~/Projects/ClimateChange/Random/IUCN/1.4 Temperate Forest.csv"))
iucn_1_6 <- data.table(read.csv("~/Projects/ClimateChange/Random/IUCN/1.6 Subtropical_Tropical Moist Lowland Forest.csv"))
iucn_1_7 <- data.table(read.csv("~/Projects/ClimateChange/Random/IUCN/1.7 Subtropical_Tropical Mangrove Forest Vegetation Above High Tide Level.csv"))
iucn_1_8 <- data.table(read.csv("~/Projects/ClimateChange/Random/IUCN/1.9 Subtropical_Tropical Moist Montane Forest.csv"))
iucn_4_4 <- data.table(read.csv("~/Projects/ClimateChange/Random/IUCN/4.4 Temperate Grassland.csv"))
iucn_4_5 <- data.table(read.csv("~/Projects/ClimateChange/Random/IUCN/4.5 Subtropical_Tropical Dry Lowland Grassland_Colombia_Venezuela and Bolivia.csv"))
habitat_names <- c("1.4 Temperate Forest","1.6 Subtropical/Tropical Moist Lowland Forest","1.7 Subtropical/Tropical Mangrove Forest Vegetation Above High Tide Level","1.8 Subtropical/Tropical Swamp Forest","4.4 Temperate Grassland","4.5 Subtropical/Tropical Dry Lowland Grassland")
iucn_1_4[,SCI_NAME:=paste(Genus, Species, " ")]
iucn_1_4[,habitat_names[1]:=TRUE]
iucn_1_4.out <- iucn_1_4[,c("SCI_NAME", habitat_names[1]),with=FALSE]
iucn_1_6[,SCI_NAME:=paste(Genus, Species, " ")]
iucn_1_6[,habitat_names[2]:=TRUE]
iucn_1_6.out <- iucn_1_6[,c("SCI_NAME", habitat_names[2]),with=FALSE]
iucn_1_7[,SCI_NAME:=paste(Genus, Species, " ")]
iucn_1_7[,habitat_names[3]:=TRUE]
iucn_1_7.out <- iucn_1_7[,c("SCI_NAME", habitat_names[3]),with=FALSE]
iucn_1_8[,SCI_NAME:=paste(Genus, Species, " ")]
iucn_1_8[,habitat_names[4]:=TRUE]
iucn_1_8.out <- iucn_1_8[,c("SCI_NAME", habitat_names[4]),with=FALSE]
iucn_4_4[,SCI_NAME:=paste(Genus, Species, " ")]
iucn_4_4[,habitat_names[5]:=TRUE]
iucn_4_4.out <- iucn_4_4[,c("SCI_NAME", habitat_names[5]),with=FALSE]
iucn_4_5[,SCI_NAME:=paste(Genus, Species, " ")]
iucn_4_5[,habitat_names[6]:=TRUE]
iucn_4_5.out <- iucn_4_5[,c("SCI_NAME", habitat_names[6]),with=FALSE]
答案 0 :(得分:4)
您也可以使用Map来避免for循环。示例代码
library(data.table)
dt1 <- data.table(v1 = 1:10, v2 = c("a", "b")) # eg iucn_1_4
dt2 <- data.table(v1 = 1:10, v2 = c("a", "b", "c", "d", "e")) # eg iucn_1_6
names <- c("name1", "name2") # replace by habitat_names
f <- function(dt, name) {
dt[, SCI_NAME := paste0(v1, v2)] # replace v1, v2 by Genus, Species
dt[, eval(name) := TRUE]
dt[, c("SCI_NAME", name), with = FALSE]
}
res <- Map(f, list(dt1, dt2), names)
然后你可以在res上调用join_all(如果我理解这是你的目的)
答案 1 :(得分:1)
files <- c( "file1.csv", "file2.csv", etc)
DT.list <- lapply(files, fread)
DT.out <- list()
for (i in seq(DT.list)) {
DT.list[[i]][, SCI_NAME:=paste(Genus, Species, " ")]
DT.list[[i]][, c(habitat_names[i]) := TRUE]
DT.out[[i]] <- DT.list[[i]][,c("SCI_NAME", habitat_names[i]),with=FALSE]
}
然后,
rbindlist(DT.out)
or
do.call(merge, DT.out)
有关详细信息,请参阅Data.table meta-programming问题
答案 2 :(得分:0)
未经测试的代码可能是这样的吗?
namestoread <- c(
"1.4 Temperate Forest.csv"
,"1.6 Subtropical_Tropical Moist Lowland Forest.csv"
,"1.7 Subtropical_Tropical Mangrove Forest Vegetation Above High Tide Level.csv"
,"1.9 Subtropical_Tropical Moist Montane Forest.csv"
,"4.4 Temperate Grassland.csv"
,"4.5 Subtropical_Tropical Dry Lowland Grassland_Colombia_Venezuela and Bolivia.csv"
)
listofdatasets <- vector(mode = 'list')
for ( i in seq(length(namestoread)))
{
listofdatasets[[i]] <- read.csv(paste0("~/Projects/ClimateChange/Random/IUCN/",namestoread[i]))
listofdatasets[[i]][,SCI_NAME:=paste(Genus, Species, " ")]
listofdatasets[[i]][,namestoread[i]:=TRUE]
}
join_all(listofdatasets)
答案 3 :(得分:0)
R的良好做法是,如果你不止一次做某事,那么写一个函数来处理重复的元素:
例如:
#get names
habitat_names <- c("1.4 Temperate Forest",
"1.6 Subtropical/Tropical Moist LowlandForest",
"1.7 Subtropical/Tropical Mangrove Forest Vegetation Above High",
#etc.....
)
#a simple function
getSpecies <- function(path, index, Names){
data <- data.table(read.csv(path)
data[,SCI_NAME:=paste(Genus, Species, " ")]
data[,Names[index]:=TRUE]
out <- data[,c("SCI_NAME", Names[index]),with=FALSE]
return(out)
}
#call function:
iucn_1_4 <- getSpecies(path = "~/Projects/ClimateChange/Random/IUCN/1.4 TemperateForest.csv",
index = 1,
names = habitat_names)