我目前正在合并12个不同的数据帧,每个数据帧由一个id和添加列的每个480,00 obs,因此它变成48k obs x 14可变数据帧。但是,这需要花费太长时间来处理,我正在寻找一种更快的方法来实现这一目标。
dput:
# January data
jan <- structure(list(gridNumber = c("17578", "18982", "18983", "18984",
"18985"), PRISM_ppt_stable_4kmM2_193301_bil = c(35.7099990844727,
36, 35.4199981689453, 33.7299995422363, 33.2799987792969)), .Names = c("gridNumber",
"PRISM_ppt_stable_4kmM2_193301_bil"), row.names = c("17578",
"18982", "18983", "18984", "18985"), class = "data.frame")
# February data
feb <- structure(list(gridNumber = c("17578", "18982", "18983", "18984",
"18985"), PRISM_ppt_stable_4kmM2_193302_bil = c(14.6199998855591,
14.5600004196167, 14.9899997711182, 15.4700002670288, 15.5799999237061
)), .Names = c("gridNumber", "PRISM_ppt_stable_4kmM2_193302_bil"
), row.names = c("17578", "18982", "18983", "18984", "18985"), class = "data.frame")
# March Data
mar <- structure(list(gridNumber = c("17578", "18982", "18983", "18984",
"18985"), PRISM_ppt_stable_4kmM2_193303_bil = c(23.8400001525879,
23.9200000762939, 24.3400001525879, 25.7900009155273, 26.5900001525879
)), .Names = c("gridNumber", "PRISM_ppt_stable_4kmM2_193303_bil"
), row.names = c("17578", "18982", "18983", "18984", "18985"), class = "data.frame")
dplyr代码:
library(dplyr)
datalist <- list(jan, feb, mar)
full <- Reduce(function(x,y) {full_join(x,y, by = "gridNumber")}, datalist)
这个代码显然运行速度要快得多,因为它的阻力很低,但有没有更快的方法呢?
答案 0 :(得分:3)
以下是使用data.table
和reshape2
library(data.table)
library(reshape2)
# create a list of data frames, and coerce to data.tables
month_list <- lapply(list(jan,feb,mar),setDT)
# add id column with old variable name and rename value column
for(i in seq_along(month_list)){
set(month_list[[i]],j="ID",value = names(month_list[[i]])[2])
setnames(month_list[[i]], names(month_list[[i]])[2], "value")
}
# put in long form
long_data <- rbindlist(month_list)
# then use `dcast.data.table` to make wide
wide <- dcast.data.table(long_data, gridNumber~ID, value = 'value')
答案 1 :(得分:0)
Dunno如果这会更快,但是:
list(jan = jan %>% rename(PRISM = PRISM_ppt_stable_4kmM2_193301_bil),
feb = feb %>% rename(PRISM = PRISM_ppt_stable_4kmM2_193302_bil),
mar = mar %>% rename(PRISM = PRISM_ppt_stable_4kmM2_193303_bil)) %>%
bind_rows(.id = "month") %>%
spread(month, PRISM)