在ggplot中绘制大量行

时间:2018-10-15 14:43:50

标签: r ggplot2 data.table gridextra

library(ggplot2)
library(gridExtra)
library(data.table)

样本数据

days <- tibble(doy = rep(1:365), date = seq.Date(ymd("2001-1-1"), ymd("2001-12-31"), by = 1),
           month = month(date), biweek = case_when(mday(date) <= 15 ~ (month * 2) - 1, mday(date) > 15  ~ month * 2))
days <- days %>% dplyr::select(-date)

sample.dat <- data.frame(year = rep(1980:2015, each = 365), doy = rep(1:365, times = 36),
                     pre = sample(0:50, 13140, replace = T), et0 = sample(0:10, 13140, replace = T),
                     tmax = sample(20:35, 13140, replace = T), tmin = sample(0:18, 13140, replace = T),
                     tmean = sample(15:38, 13140, replace = T), solar = sample(10:14, 13140, replace = T),
                     rh = sample(80:90, 13140, replace = T), u2 = sample(0:2, 13140, replace = T))

sample.dat <- sample.dat %>% dplyr::left_join(days)

我有两个文件夹:new和archive。在每个文件夹中,两个文件夹中有5569个文本文件。 单个文本文件的结构类似于上方的sample.dat

我有兴趣从两个文件夹中读取每个文件,并相互绘制变量

  pdf(paste0(getwd(),"/climate_comparison.pdf"), width = 10, height = 8)

  for(v in seq_along(vec.files)){

      file.name <- vec.files[v]

      # read the file from the archive folder 
      temp.archive <- fread(paste0(getwd(),"/data/archive/",file.name,".txt"))

      # read file from the new folder 
      temp.new <- fread(paste0(getwd(),"/data/new/",file.name,".txt"))

      # join the two
      temp.old <- temp.old %>% dplyr::rename(old.pre = pre, old.et0 = et0, old.tmax = tmax, 
                                             old.tmin = tmin, old.rs = solar, old.rh = rh, 
                                             old.u2 = u2, old.tmean = tmean)

      temp.full <- temp.new %>% dplyr::left_join(temp.old)

      # defining the range of axis 
       tmaxrange <- c(min(c(temp.full$tmax, temp.full$old.tmax)), max(c(temp.full$tmax, temp.full$old.tmax)))
       tminrange <- c(min(c(temp.full$tmin, temp.full$old.tmin)), max(c(temp.full$tmin, temp.full$old.tmin)))
       tmeanrange <- c(min(c(temp.full$tmean, temp.full$old.tmean)), max(c(temp.full$tmean, temp.full$old.tmean)))
       prerange <- c(min(c(temp.full$pre, temp.full$old.pre)), max(c(temp.full$pre, temp.full$old.pre)))
       et0range <- c(min(c(temp.full$et0, temp.full$old.et0)), max(c(temp.full$et0, temp.full$old.et0)))
       rsrange <- c(min(c(temp.full$rs, temp.full$old.rs)), max(c(temp.full$rs, temp.full$old.rs)))

      p1 <- ggplot(temp.full, aes(x = tmax, y = old.tmax)) + geom_point(size = 0.02, alpha = 1/10) ylim(tmaxrange) + xlim(tmaxrange) 
      p2 <- ggplot(temp.full, aes(x = tmin, y = old.tmin)) + geom_point(size = 0.02, alpha = 1/10) + ylim(tminrange) + xlim(tminrange) 
      p3 <- ggplot(temp.full, aes(x = tmean, y = old.tmean)) + geom_point(size = 0.02, alpha = 1/10) + ylim(tmeanrange) + xlim(tmeanrange)
      p4 <- ggplot(temp.full, aes(x = et0, y = old.et0)) + geom_point(size = 0.02, alpha = 1/10) + ylim(et0range) + xlim(et0range) 
      p5 <- ggplot(temp.full, aes(x = rs, y = old.rs)) + geom_point(size = 0.02, alpha = 1/10) + ylim(rsrange) + xlim(rsrange) 
      p6 <- ggplot(temp.full, aes(x = pre, y = old.pre)) + geom_point(size = 0.02, alpha = 1/10) ylim(prerange) + xlim(prerange) 

      pp <- grid.arrange(p1, p2, p3, p4, p5, p6, ncol = 3)
    }
    dev.off()

基本上,我想要一个包含5569页的pdf,并且每页都有来自new和archive文件夹的6个变量相互绘制。这是一个非常缓慢的过程,我想知道是否有人有任何想法可以使其更快。

0 个答案:

没有答案