在多列上使用for循环在r ggplot图中创建标题

时间:2018-09-19 14:38:26

标签: r loops ggplot2 iteration

我对ggplot使用循环和函数非常陌生。我做了一个函数来遍历数据框中的一列“ HUC14”。对于HUC14的每个唯一值,它将创建一个子集数据帧以用于ggplot,并且还将该唯一值用作标题。但是,我想知道是否还可以在数据框中的另一列中循环以添加到绘图标题以及HUC14号吗?我使用的代码不会更改绘图或HUC14,它只会通过名称循环。不知道我在做什么错!我希望HUC14和名称与要绘制的两个参数的给定值匹配!

样本数据:

structure(list(stdate = structure(c(11359, 16498, 12149, 12437, 
13277, 17536, 16517, 16503, 16134, 16105, 15783, 16470, 14266, 
13566, 14984), class = "Date"), orgid = c("USGS-NJ", "USGS-NJ", 
"USGS-NJ", "21NJDEP1", "21NJDEP1", "USGS-NJ", "NJDEP_BFBM", "NJDEP_BFBM", 
"NJDEP_BFBM", "USGS-NJ", "NJDEP_BFBM", "USGS-NJ", "21NJDEP1", 
"GSWA", "NJDEP_BFBM"), locid = c("USGS-01396030", "USGS-01378560", 
"USGS-01393400", "21NJDEP1-01396030", "21NJDEP1-AN0770", "USGS-01378560", 
"NJDEP_BFBM-01394180", "NJDEP_BFBM-AN0425A", "NJDEP_BFBM-01394180", 
"USGS-01378560", "NJDEP_BFBM-01394180", "USGS-01394500", "21NJDEP1-01379525", 
"GSWA-LB4S", "NJDEP_BFBM-01379525"), sttime = structure(c(34200, 
50400, 80280, 35700, 0, NA, 41400, 45300, 39600, 46800, 40500, 
42300, 34800, 42900, 37380), class = c("hms", "difftime"), units = "secs"), 
    valunit = c("uS/cm @25C", "uS/cm @25C", "uS/cm @25C", "uS/cm @25C", 
    "uS/cm @25C", "uS/cm @25C", "uS/cm @25C", "uS/cm @25C", "uS/cm @25C", 
    "uS/cm @25C", "uS/cm @25C", "uS/cm @25C", "uS/cm @25C", "uS/cm @25C", 
    "uS/cm @25C"), swqs = c("FW2-NT", "FW2-NT", "FW2-NT", "FW2-NT", 
    "FW2-NT", "FW2-NT", "FW2-NT", "FW2-NT", "FW2-NT", "FW2-NT", 
    "FW2-NT", "FW2-NT", "FW2-NT", "FW2-NT", "FW2-NT"), WMA = c(7L, 
    5L, 7L, 7L, 16L, NA, 7L, 9L, 7L, 5L, 7L, 7L, 6L, 6L, 6L), 
    year = c(2001L, 2015L, 2003L, 2004L, 2006L, NA, 2015L, 2015L, 
    2014L, 2014L, 2013L, 2015L, 2009L, 2007L, 2011L), locid2 = c("01396030", 
    "01378560", "01393400", "01396030", "AN0770", "01378560", 
    "01394180", "AN0425A", "01394180", "01378560", "01394180", 
    "01394500", "01379525", "LB4S", "01379525"), HUC14 = c("HUC02030104050090", 
    "HUC02030103180010", "HUC02030104020020", "HUC02030104050090", 
    "HUC02040206230040", "HUC02030103180010", "HUC02030104050040", 
    "HUC02030105120120", "HUC02030104050040", "HUC02030103180010", 
    "HUC02030104050040", "HUC02030104050040", "HUC02030103010190", 
    "HUC02030103010040", "HUC02030103010140"), MonLocName = c("Rahway R S Br in Merrill Park off Fairview Rd in Woodbridge", 
    "Coles Bk at Hackensack", "Elizabeth R at Hillside", "Rahway R S Br in Merrill Park off Fairview Rd in Woodbridge", 
    "Green Ck on Rt 47 in Middle Twp", "Coles Bk at Hackensack", 
    "Rahway R trib at Springfield", "Ambrose Bk at Behmer Rd in Piscataway", 
    "Rahway R trib at Springfield", "Coles Bk at Hackensack", 
    "Rahway R trib at Springfield", "Rahway R near Springfield", 
    "Canoe Bk on Parsonage Hill Rd in Millburn Twp", "Loantaka Bk at Woodland Ave (upstream)", 
    "Canoe Bk on Parsonage Hill Rd in Millburn Twp"), Chloride = structure(c(903, 
    2100, NA, 1409.3, 151, NA, 1340, 52.062, 1170, 1020, 1240, 
    1030, 1220, 209, 1040), na.action = structure(c(1L, 2L, 3L, 
    4L, 7L, 8L, 9L, 10L), class = "omit")), Specific_conductance = structure(c(7450, 
    7190, 6080, 5550, 4680, 4490, 4250, 4090, 3890, 3710, 3710, 
    3580, 3570, 3570, 3380), na.action = structure(5:10, class = "omit")), 
    tds = structure(c(1620, 3630, NA, 3056, 606, NA, 2530, 141, 
    2590, 1840, 2050, 1970, 57, 604, 1870), na.action = structure(1:6, class = "omit"))), .Names = c("stdate", 
"orgid", "locid", "sttime", "valunit", "swqs", "WMA", "year", 
"locid2", "HUC14", "MonLocName", "Chloride", "Specific_conductance", 
"tds"), class = c("data.table", "data.frame"), row.names = c(NA, 
-15L), .internal.selfref = <pointer: 0x00000000028f0788>)

我正在使用的代码:

corr_plots<-function(df,x,y){

  # create list of HUCs in data to loop over 
  HUC_list <- unique(df2$HUC14)
  name_list<-unique(df2$MonLocName)

  for (i in seq_along(HUC_list)) { 
    for(j in seq_along(name_list)){
        x_var <- enquo(x)
        y_var <- enquo(y)

      plot<-ggplot(subset(df2, df2$HUC14==HUC_list[i]),
             aes(x = !!x_var, y = !!y_var))+
      geom_point(size=2,alpha=0.5)+
      geom_smooth(method = "lm", se = FALSE)+ 

      scale_x_continuous(limits = c(0,6200), expand = c(0, 0)) +
      scale_y_continuous(limits = c(0,2000), expand = c(0, 0)) +
        ggtitle(paste(HUC_list[i],as.character(name_list[j])))


      print(plot)

    }
  }
}

无循环的工作示例:

corr_plots<-function(df,HUC,x,y){

        x_var <- enquo(x)
        y_var <- enquo(y)

      ggplot(subset(df, HUC14 == HUC),
             aes(x = !!x_var, y = !!y_var))+
      geom_point(size=2,alpha=0.5)+
      geom_smooth(method = "lm", se = FALSE)+ 

      scale_x_continuous(limits = c(0,6200), expand = c(0, 0)) +
      scale_y_continuous(limits = c(0,2000), expand = c(0, 0)) 

  }
corr_plots(df2,"HUC02030104020020",Specific_conductance,Chloride)

1 个答案:

答案 0 :(得分:2)

您可以通过多种方式执行此操作,我发现tidyverse / purrr方法在灵活性和简洁性之间取得了很好的平衡:

library(tidyverse)

corr_plot <- function(df, x, y, title) {
  x_var <- enquo(x)
  y_var <- enquo(y)

  ggplot(df, aes(x = !!x_var, y = !!y_var)) +
    geom_point(size = 2) +
    geom_smooth(method = "lm", se = FALSE) +
    scale_x_continuous(limits = c(0, 6200), expand = c(0, 0)) +
    scale_y_continuous(limits = c(0, 2000), expand = c(0, 0)) +
    labs(
      title = title,
      subtitle = paste(rlang::quo_text(x_var), "vs.", rlang::quo_text(y_var))
    )
}

nested_by_HUC14 <- 
  df %>%
  group_by(HUC14) %>% 
  nest()

nested_by_HUC14 %>%
  mutate(plot = map2(data, HUC14, ~ corr_plot(.x, Specific_conductance, Chloride, .y))) %>%
  walk(print(.$plot))

如果您不希望将[[1]]...[[2]]...etc打印到控制台,则可以将walk语句更改为pull(plot) %>% walk(print)