带有条件子集的ggplot2循环图

时间:2016-09-21 23:37:24

标签: r loops ggplot2 subset apply

数据描述:

我有一个长格式的数据集,有多个不同的分组变量(在数据示例中:StandID和simID)

我要做的是:

我需要根据唯一的分组变量,根据此数据集为多列创建简单的散点图(x =预测,y =观察到)。

我正在尝试使用标准图的一个例子是

obs=subset(example,simID=="OBS_OBS_OBS")
csfnw=example[example$simID== "CS_F_NW",]

plot(obs$X1HR,csfnw$X1HR)

我需要为所有simID和第9-14列执行此操作。 (数据示例总共12个图表)

我尝试了什么:

我遇到的问题是y轴需要保持不变,同时循环通过x轴的不同子集。

我会在前面承认,我不知道最好的方法是什么...我认为这很容易一瞬间因为数据已经是长格式而我只是指向一个数据的子集。

1)我最初的方法是尝试拼接数据,以便每个simID都有自己的数据框,并将其与观察数据帧进行比较,但我不知道如何将其传递给ggplot 。

2)我的第二个想法是制作某种makeGraph函数,其中包含我基本上想要的所有美学,并使用某种应用来通过函数传递所有内容,但我无法工作。

 makePlot=function(dat,x,y) {
 ggplot(data=dat,aes(x=x,y=y))+geom_point(shape=Treat)+theme_bw()
}

我可以开始工作的只是将数据框分解为变量的向量,然后传递给某种循环/应用

sims=levels(example$simID)
sims2=sims[sims != "OBS_OBS_OBS"]
fuel_classes=colnames(example)[9:14]

谢谢

数据示例:

    example=structure(list(Year = structure(c(7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L), .Label = c("2001", "2002", "2003", "2004", "2005", 
"2013", "2014", "2015"), class = "factor"), StandID = structure(c(10L, 
2L, 6L, 22L, 14L, 18L, 34L, 26L, 30L, 10L, 2L, 6L, 22L, 14L, 
18L, 34L, 26L, 30L, 10L, 2L, 6L, 22L, 14L, 18L, 34L, 26L, 30L
), .Label = c("1NB", "1NC", "1NT", "1NTB", "1RB", "1RC", "1RT", 
"1RTB", "1SB", "1SC", "1ST", "1STB", "2NB", "2NC", "2NT", "2NTB", 
"2RB", "2RC", "2RT", "2RTB", "2SB", "2SC", "2ST", "2STB", "3NB", 
"3NC", "3NT", "3NTB", "3RB", "3RC", "3RT", "3RTB", "3SB", "3SC", 
"3ST", "3STB"), class = "factor"), Block = structure(c(1L, 1L, 
1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 
1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L), .Label = c("1", "2", "3"
), class = "factor"), Aspect = structure(c(3L, 1L, 2L, 3L, 1L, 
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 
3L, 1L, 2L, 3L, 1L, 2L), .Label = c("N", "R", "S"), class = "factor"), 
Treat = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L), .Label = c("B", "C", "T", "TB"), class = "factor"), 
Variant = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("CS", "OBS", "SN"), class = "factor"), 
Fuels = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L), .Label = c("F", "NF", "OBS"), class = "factor"), 
Weather = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("NW", "OBS", "W"), class = "factor"), 
X1HR = c(0.321666667, 0.177777778, 0.216111111, 0.280555556, 
0.255555556, 0.251666667, 0.296666667, 0.231111111, 0.22, 
0.27556628, 0.298042506, 0.440185249, 0.36150676, 0.398630172, 
0.367523015, 0.345717251, 0.349305987, 0.412227929, 0.242860824, 
0.258737177, 0.394024998, 0.287317872, 0.321927488, 0.281322986, 
0.313588411, 0.303123146, 0.383658946), X10HR = c(0.440555556, 
0.32, 0.266666667, 0.292222222, 0.496666667, 0.334444444, 
0.564444444, 0.424444444, 0.432777778, 0.775042951, 0.832148314, 
1.08174026, 1.023838878, 0.976997674, 0.844206274, 0.929837704, 
1.0527215, 1.089246511, 0.88642776, 0.920596302, 1.209707737, 
1.083737493, 1.077612877, 0.92481339, 1.041637182, 1.149550319, 
1.229776621), X100HR = c(0.953888889, 1.379444444, 0.881666667, 
1.640555556, 2.321666667, 1.122222222, 1.907777778, 1.633888889, 
1.208333333, 1.832724094, 2.149356842, 2.364475727, 2.493232965, 
2.262988567, 1.903909683, 2.135747433, 2.256677628, 2.288722038, 
1.997704744, 2.087135553, 2.524872541, 2.34671092, 2.338253498, 
2.06796217, 2.176314831, 2.580271006, 2.857197046), X1000HR = c(4.766666667, 
8.342222222, 3.803333333, 8.057777778, 10.11444444, 6.931111111, 
6.980555556, 13.20611111, 1.853333333, 3.389177084, 4.915714741, 
2.795267582, 2.48227787, 2.218413353, 1.64684248, 2.716156483, 
2.913746119, 2.238629341, 3.449863434, 3.432626724, 3.617531776, 
3.641639471, 3.453454971, 3.176793337, 3.459602833, 3.871166945, 
2.683447838), LITTER = c(2.4, 2.219444444, 2.772222222, 2.596666667, 
2.693888889, 2.226111111, 2.552222222, 3.109444444, 2.963333333, 
2.882233381, 3.025934696, 3.174396992, 3.291081667, 2.897673607, 
2.737119675, 2.987895727, 3.679605484, 2.769756079, 2.882241249, 
3.02594161, 3.174404144, 3.291091681, 2.897681713, 2.737129688, 
2.987901449, 3.679611444, 2.769766569), DUFF = c(1.483333333, 
1.723888889, 0.901666667, 1.520555556, 1.49, 1.366111111, 
0.551666667, 1.056111111, 0.786111111, 2.034614563, 2.349547148, 
1.685223818, 2.301301956, 2.609308243, 2.21895647, 2.043699026, 
2.142618418, 0.953421116, 4.968493462, 4.990526676, 5.012362003, 
5.023665905, 4.974074364, 4.947199821, 4.976779461, 5.082509995, 
3.55211544), simID = structure(c(5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L), .Label = c("CS_F_NW", "CS_F_W", 
"CS_NF_NW", "CS_NF_W", "OBS_OBS_OBS", "SN_F_NW", "SN_F_W", 
"SN_NF_NW", "SN_NF_W"), class = "factor")), .Names = c("Year", 
"StandID", "Block", "Aspect", "Treat", "Variant", "Fuels", "Weather", 
"X1HR", "X10HR", "X100HR", "X1000HR", "LITTER", "DUFF", "simID"
), row.names = c(37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 
82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 127L, 128L, 129L, 
130L, 131L, 132L, 133L, 134L, 135L), class = "data.frame")

1 个答案:

答案 0 :(得分:0)

你实际上是在正确的轨道上。如果所有绘图都相同,只需创建一个函数,然后使用循环遍历子集。对于您的示例,可以这样做:

library(ggplot2)
# the plot function
plotFun = function(dat, title) {
  ggplot(data=dat) + 
    geom_point(aes(x = x, y = y), shape=18) + 
    ggtitle(title) +
    theme_bw()
}
# columns of interest
colIdx = 9:14
# split on all values of simID
dfList = split(example, example$simID)
# simID has never appearing factors. These are removed
dfList = dfList[lapply(dfList, nrow) != 0]
# make empty array for saving plots
plotList = array(list(), dim = c(length(dfList), length(dfList), length(colIdx)),
                 dimnames = list(names(dfList), names(dfList), names(example)[colIdx]))
# the first two loops loop over all unique combinations of dfList
for (i in 2:length(dfList)) { 
  for (j in 1:(i-1)) {
    # loop over target variables
    for (k in seq_along(colIdx)) {
      # store variables to plot in a temporary dataframe
      tempDf = data.frame(x = dfList[[i]][, colIdx[k]],
                          y = dfList[[j]][, colIdx[k]])
      # add a title so we can see in the plot what is plotted vs what
      title = paste0(names(dfList)[i], ":", names(dfList[[i]])[colIdx[k]], " VS ",
                     names(dfList)[j], ":", names(dfList[[j]])[colIdx[k]])
      # make and save plot
      plotList[[i, j, k]] = plotFun(tempDf, title)
    }
  }
}
# call the plots like this
plotList[[2, 1, 4]]
# Note that we only filled the lower triangle of combinations
# therefore indexing with [[1, 1, 1]] just returns NULL
plotList[, , 1]

这个过程可能会更加优化,但在创建图表时,我会明确表示速度,因为速度通常不是问题。