我有一个for loop
,它在每次迭代后产生一个数据帧。我想将所有数据框附加在一起,但发现它很困难。以下就是我
我正在尝试,请建议如何解决它:
d = NULL
for (i in 1:7) {
# vector output
model <- #some processing
# add vector to a dataframe
df <- data.frame(model)
}
df_total <- rbind(d,df)
答案 0 :(得分:94)
不要在循环内完成。制作一个列表,然后将它们组合在循环之外。
datalist = list()
for (i in 1:5) {
# ... make some data
dat <- data.frame(x = rnorm(10), y = runif(10))
dat$i <- i # maybe you want to keep track of which iteration produced it?
datalist[[i]] <- dat # add it to your list
}
big_data = do.call(rbind, datalist)
# or big_data <- dplyr::bind_rows(datalist)
# or big_data <- data.table::rbindlist(datalist)
这是一种更像R的做事方式。它也可以大大加快,特别是如果您使用dplyr::bind_rows
或data.table::rbindlist
进行数据帧的最终组合。
答案 1 :(得分:5)
试试这个:
df_total = data.frame()
for (i in 1:7){
# vector output
model <- #some processing
# add vector to a dataframe
df <- data.frame(model)
df_total <- rbind(df_total,df)
}
答案 2 :(得分:2)
再次maRtin是正确的,但要实现这一点,您必须从已经拥有至少一列的数据框开始
model <- #some processing
df <- data.frame(col1=model)
for (i in 2:17)
{
model <- # some processing
nextcol <- data.frame(model)
colnames(nextcol) <- c(paste("col", i, sep="")) # rename the comlum
df <- cbind(df, nextcol)
}
答案 3 :(得分:2)
在Coursera课程中,R编程简介,测试了这项技能。 他们给所有学生332个单独的csv文件,并要求他们以编程方式组合几个文件来计算污染物的平均值。
这是我的解决方案:
# create your empty dataframe so you can append to it.
combined_df <- data.frame(Date=as.Date(character()),
Sulfate=double(),
Nitrate=double(),
ID=integer())
# for loop for the range of documents to combine
for(i in min(id): max(id)) {
# using sprintf to add on leading zeros as the file names had leading zeros
read <- read.csv(paste(getwd(),"/",directory, "/",sprintf("%03d", i),".csv", sep=""))
# in your loop, add the files that you read to the combined_df
combined_df <- rbind(combined_df, read)
}
答案 4 :(得分:1)
尝试使用rbindlist
而不是rbind
,因为它非常非常快。
示例:
library(data.table)
##### example 1: slow processing ######
table.1 <- data.frame(x = NA, y = NA)
time.taken <- 0
for( i in 1:100) {
start.time = Sys.time()
x <- rnorm(100)
y <- x/2 +x/3
z <- cbind.data.frame(x = x, y = y)
table.1 <- rbind(table.1, z)
end.time <- Sys.time()
time.taken <- (end.time - start.time) + time.taken
}
print(time.taken)
> Time difference of 0.1637917 secs
####example 2: faster processing #####
table.2 <- list()
t0 <- 0
for( i in 1:100) {
s0 = Sys.time()
x <- rnorm(100)
y <- x/2 + x/3
z <- cbind.data.frame(x = x, y = y)
table.2[[i]] <- z
e0 <- Sys.time()
t0 <- (e0 - s0) + t0
}
s1 = Sys.time()
table.3 <- rbindlist(table.2)
e1 = Sys.time()
t1 <- (e1-s1) + t0
t1
> Time difference of 0.03064394 secs
答案 5 :(得分:1)
以下一些tidyverse
和自定义功能选项可能会根据您的需要起作用:
library(tidyverse)
# custom function to generate, filter, and mutate the data:
combine_dfs <- function(i){
data_frame(x = rnorm(5), y = runif(5)) %>%
filter(x < y) %>%
mutate(x_plus_y = x + y) %>%
mutate(i = i)
}
df <- 1:5 %>% map_df(~combine_dfs(.))
df <- map_df(1:5, ~combine_dfs(.)) # both give the same results
> df %>% head()
# A tibble: 6 x 4
x y x_plus_y i
<dbl> <dbl> <dbl> <int>
1 -0.973 0.673 -0.300 1
2 -0.553 0.0463 -0.507 1
3 0.250 0.716 0.967 2
4 -0.745 0.0640 -0.681 2
5 -0.736 0.228 -0.508 2
6 -0.365 0.496 0.131 3
如果您有一个需要合并的文件目录,则可以执行类似的操作:
dir_path <- '/path/to/data/test_directory/'
list.files(dir_path)
combine_files <- function(path, file){
read_csv(paste0(path, file)) %>%
filter(a < b) %>%
mutate(a_plus_b = a + b) %>%
mutate(file_name = file)
}
df <- list.files(dir_path, '\\.csv$') %>%
map_df(~combine_files(dir_path, .))
# or if you have Excel files, using the readxl package:
combine_xl_files <- function(path, file){
readxl::read_xlsx(paste0(path, file)) %>%
filter(a < b) %>%
mutate(a_plus_b = a + b) %>%
mutate(file_name = file)
}
df <- list.files(dir_path, '\\.xlsx$') %>%
map_df(~combine_xl_files(dir_path, .))
答案 6 :(得分:1)
对我来说,它非常简单。首先,我创建了一个空的data.frame
,然后在每次迭代中都向其中添加了一列。这是我的代码:
df <- data.frame(modelForOneIteration)
for(i in 1:10){
model <- # some processing
df[,i] = model
}
答案 7 :(得分:-1)
x <- c(1:10)
# empty data frame with variables ----
df <- data.frame(x1=character(),
y1=character())
for (i in x) {
a1 <- c(x1 == paste0("The number is ",x[i]),y1 == paste0("This is another number ", x[i]))
df <- rbind(df,a1)
}
names(df) <- c("st_column","nd_column")
View(df)
那可能是个好方法。...