library(ggplot2)
library(gridExtra)
library(data.table)
样本数据
days <- tibble(doy = rep(1:365), date = seq.Date(ymd("2001-1-1"), ymd("2001-12-31"), by = 1),
month = month(date), biweek = case_when(mday(date) <= 15 ~ (month * 2) - 1, mday(date) > 15 ~ month * 2))
days <- days %>% dplyr::select(-date)
sample.dat <- data.frame(year = rep(1980:2015, each = 365), doy = rep(1:365, times = 36),
pre = sample(0:50, 13140, replace = T), et0 = sample(0:10, 13140, replace = T),
tmax = sample(20:35, 13140, replace = T), tmin = sample(0:18, 13140, replace = T),
tmean = sample(15:38, 13140, replace = T), solar = sample(10:14, 13140, replace = T),
rh = sample(80:90, 13140, replace = T), u2 = sample(0:2, 13140, replace = T))
sample.dat <- sample.dat %>% dplyr::left_join(days)
我有两个文件夹:new和archive。在每个文件夹中,两个文件夹中有5569个文本文件。
单个文本文件的结构类似于上方的sample.dat
我有兴趣从两个文件夹中读取每个文件,并相互绘制变量
pdf(paste0(getwd(),"/climate_comparison.pdf"), width = 10, height = 8)
for(v in seq_along(vec.files)){
file.name <- vec.files[v]
# read the file from the archive folder
temp.archive <- fread(paste0(getwd(),"/data/archive/",file.name,".txt"))
# read file from the new folder
temp.new <- fread(paste0(getwd(),"/data/new/",file.name,".txt"))
# join the two
temp.old <- temp.old %>% dplyr::rename(old.pre = pre, old.et0 = et0, old.tmax = tmax,
old.tmin = tmin, old.rs = solar, old.rh = rh,
old.u2 = u2, old.tmean = tmean)
temp.full <- temp.new %>% dplyr::left_join(temp.old)
# defining the range of axis
tmaxrange <- c(min(c(temp.full$tmax, temp.full$old.tmax)), max(c(temp.full$tmax, temp.full$old.tmax)))
tminrange <- c(min(c(temp.full$tmin, temp.full$old.tmin)), max(c(temp.full$tmin, temp.full$old.tmin)))
tmeanrange <- c(min(c(temp.full$tmean, temp.full$old.tmean)), max(c(temp.full$tmean, temp.full$old.tmean)))
prerange <- c(min(c(temp.full$pre, temp.full$old.pre)), max(c(temp.full$pre, temp.full$old.pre)))
et0range <- c(min(c(temp.full$et0, temp.full$old.et0)), max(c(temp.full$et0, temp.full$old.et0)))
rsrange <- c(min(c(temp.full$rs, temp.full$old.rs)), max(c(temp.full$rs, temp.full$old.rs)))
p1 <- ggplot(temp.full, aes(x = tmax, y = old.tmax)) + geom_point(size = 0.02, alpha = 1/10) ylim(tmaxrange) + xlim(tmaxrange)
p2 <- ggplot(temp.full, aes(x = tmin, y = old.tmin)) + geom_point(size = 0.02, alpha = 1/10) + ylim(tminrange) + xlim(tminrange)
p3 <- ggplot(temp.full, aes(x = tmean, y = old.tmean)) + geom_point(size = 0.02, alpha = 1/10) + ylim(tmeanrange) + xlim(tmeanrange)
p4 <- ggplot(temp.full, aes(x = et0, y = old.et0)) + geom_point(size = 0.02, alpha = 1/10) + ylim(et0range) + xlim(et0range)
p5 <- ggplot(temp.full, aes(x = rs, y = old.rs)) + geom_point(size = 0.02, alpha = 1/10) + ylim(rsrange) + xlim(rsrange)
p6 <- ggplot(temp.full, aes(x = pre, y = old.pre)) + geom_point(size = 0.02, alpha = 1/10) ylim(prerange) + xlim(prerange)
pp <- grid.arrange(p1, p2, p3, p4, p5, p6, ncol = 3)
}
dev.off()
基本上,我想要一个包含5569页的pdf,并且每页都有来自new和archive文件夹的6个变量相互绘制。这是一个非常缓慢的过程,我想知道是否有人有任何想法可以使其更快。