基于某些标准从多个数据帧之间分割的数据绘制多个数据集

时间:2017-05-17 23:31:49

标签: r dataframe plot subset with-statement

我有几个数据帧,每个数据帧都有来自多个来源的数据。我需要能够根据某些标准绘制数据。例如,我希望能够在season=="wet"中绘制所有记录,但在一个图上的每个数据框中都有canopy_understory=="c"的标准,并使用temp_sd列作为错误栏。由于收集数据的方式以及被认为是解析它的最合理方式,数据以这种特殊方式构建。

我尝试使用with提取数据的原因是:

with(valley_ls,temp_mean[season=="dry"&canopy_understory=="c"])
with(valley_ls,hour[season=="dry"&canopy_understory=="c"])
with(ridge_ls,temp_mean[season=="dry"&canopy_understory=="c"])
with(ridge_ls,hour[season=="wet"&canopy_understory=="u"])

这会让hourtemp_mean达到我想要的标准,但我需要能够提取并绘制temp_meantemp_sd为一个错误栏。我已经尝试将这些子集分配到新的数据帧中,但这很容易出错并且充满了想法。

我使用多个数据帧完成的绘图示例如下:

ggplot(data=ridge_u_w_summary,aes(x = times,y=means))+
  geom_errorbar(data=ridge_u_w_summary,aes(ymin=means-sd,ymax=means+sd),colour="red")+
  geom_line(aes(y=means),colour="red")+
  geom_line(data = valley_u_w_summary,aes(x=times,y=means))+
  geom_errorbar(data=valley_u_w_summary,aes(ymin=means-sd,ymax=means+sd))+
  geom_line(data = edge_u_w_summary,aes(x=times,y=means),colour="blue")+
  theme_classic()+
  geom_errorbar(data=edge_u_w_summary,aes(ymin=means-sd,ymax=means+sd),colour="blue")+
  labs(x="Time",y=expression("Temperature in understory May to December *C"))

哪个产生我想要的图形,如下所示: enter image description here

上面的方案工作正常,但与其他数据一起使用是站不住脚的,因为它很容易分裂成许多数据帧。 我希望能够根据不同的标准使用这些相同的数据帧生成多个图形。

dump(list="valley_ls",file="dump_valley") 的输入:

    valley_ls <-
structure(list(canopy_understory = c("c", "c", "c", "c", "c", 
"c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", 
"c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", 
"c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", 
"c", "c", "c", "c", "c", "u", "u", "u", "u", "u", "u", "u", "u", 
"u", "u", "u", "u", "u", "u", "u", "u", "u", "u", "u", "u", "u", 
"u", "u", "u", "u", "u", "u", "u", "u", "u", "u", "u", "u", "u", 
"u", "u", "u", "u", "u", "u", "u", "u", "u", "u", "u", "u", "u", 
"u", "u", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA), season = c("dry", "dry", "dry", 
"dry", "dry", "dry", "dry", "dry", "dry", "dry", "dry", "dry", 
"dry", "dry", "dry", "dry", "dry", "dry", "dry", "dry", "dry", 
"dry", "dry", "dry", "wet", "wet", "wet", "wet", "wet", "wet", 
"wet", "wet", "wet", "wet", "wet", "wet", "wet", "wet", "wet", 
"wet", "wet", "wet", "wet", "wet", "wet", "wet", "wet", "wet", 
NA, "dry", "dry", "dry", "dry", "dry", "dry", "dry", "dry", "dry", 
"dry", "dry", "dry", "dry", "dry", "dry", "dry", "dry", "dry", 
"dry", "dry", "dry", "dry", "dry", "dry", "wet", "wet", "wet", 
"wet", "wet", "wet", "wet", "wet", "wet", "wet", "wet", "wet", 
"wet", "wet", "wet", "wet", "wet", "wet", "wet", "wet", "wet", 
"wet", "wet", "wet", NA, "dry", "dry", "dry", "dry", "dry", "dry", 
"dry", "dry", "dry", "dry", "dry", "dry", "wet", "wet", "wet", 
"wet", "wet", "wet", "wet", "wet", "wet", "wet", "wet", "wet", 
"wet", "wet", "wet", "wet", "wet", "wet", "wet", "wet", "wet", 
"wet", "wet", "wet", NA), hour = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 1, 
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 
20, 21, 22, 23, 24, NA, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 1, 2, 3, 4, 5, 
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 
23, 24, NA, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 1, 2, 
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 
20, 21, 22, 23, 24, NA), temp_mean = c(21.9729388942774, 21.9202365308804, 
21.5160349854227, 21.6135348226018, 21.1698250728863, 21.3997371879106, 
21.7561224489796, 23.4584756898817, 25.5064233576642, 27.1829172141919, 
28.0613372093023, 28.7192358366271, 29.0794202898551, 28.9577075098814, 
28.4787572254335, 27.3324538258575, 26.3749638205499, 25.0135883905013, 
24.0727930535456, 23.5505277044855, 22.8426917510854, 22.7254617414248, 
22.49001447178, 22.9443645083933, 23.8475672877847, 23.6520574162679, 
23.6787136294028, 23.4181818181818, 23.5110260336907, 23.3245229007634, 
23.7951145038168, 25.3284351145038, 25.8448854961832, 27.8920801526718, 
28.0074809160305, 28.9004770992366, 28.6725190839695, 28.6689408396947, 
28.1945038167939, 27.1498808956646, 26.8738131699847, 25.4554337464252, 
25.493415007657, 24.6676358436606, 24.7966309341501, 24.2493803622498, 
24.2797856049005, 24.0876998769988, NA, 23.1587893864013, 23.0023631840796, 
22.7586842105263, 22.6404228855721, 22.4468421052632, 22.4090796019901, 
22.6627140974967, 23.479202988792, 24.4139841688654, 25.2576059850374, 
25.9127799736495, 26.2339152119701, 26.4725, 26.4246882793017, 
26.1284403669725, 25.7063511830635, 25.3211764705882, 24.8449004975124, 
24.4806535947712, 24.1813432835821, 23.9767320261438, 23.7926616915423, 
23.5675816993464, 23.7533333333333, 24.1737847222222, 23.9264961636829, 
24.0318639798489, 23.716010230179, 23.8171284634761, 23.5848191543556, 
23.9746231155779, 24.5888436067244, 25.1335427135678, 25.9691288843607, 
26.2968592964824, 26.6237391747326, 26.7585427135678, 26.5575140091696, 
26.4639447236181, 25.9500254971953, 25.8959748427673, 25.2427332993371, 
25.3163727959698, 24.7879591836735, 24.8969773299748, 24.4647448979592, 
24.5764483627204, 24.2546432062561, NA, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 24.3333333333333, 0, 24.25, 0, 24.0166666666667, 
0, 24.1583333333333, 0, 25.4, 0, 26.2333333333333, 0, 27.2109375, 
0, 27.375, 0, 26.2946428571429, 0, 24.9107142857143, 0, 24.7142857142857, 
0, 24.475, 0, NA), temp_sd = c(7.10368938706592, 6.31619559434987, 
6.63273716936668, 5.98207526384219, 6.81785819259387, 6.09008468822777, 
6.42905917510137, 6.12792441805719, 5.89400673610928, 4.93074376846361, 
5.68493137940986, 4.53882099065438, 5.67276330328425, 4.86289494479903, 
5.0064903940218, 4.50620321541371, 5.23548052610922, 5.73166481558604, 
6.9423848917103, 6.25916235030702, 7.02150223339283, 6.63198273255056, 
6.30121694018913, 1.16737424849318, 0.974447902019749, 0.883662726834489, 
1.1694960633107, 1.00119820290862, 0.950686394394519, 0.960508895273812, 
1.02435507152398, 1.9800105189055, 1.77743259093255, 2.28365035950242, 
2.41005581403749, 2.52263637550233, 2.58958905490024, 2.48794401404523, 
2.37928117718197, 1.9660860877379, 1.79705886384754, 1.22734366140094, 
1.39948592913543, 0.874295812773333, 1.97436507929567, 0.831676233519675, 
1.18463655086496, 0.809942069607868, NA, 1.22357269970187, 1.3197350152676, 
1.37832968945031, 1.34012520382106, 1.34142100192936, 1.34726498418677, 
1.31592378988696, 1.28713308691543, 1.42138403898271, 1.70993228486995, 
2.07113789555289, 2.01605834952938, 2.08999601275536, 1.93479577003559, 
1.71785163242327, 1.58992997264445, 1.56194809778642, 1.4394673032447, 
1.37240648943917, 1.29257875472938, 1.31372428816748, 1.29167595764256, 
1.31947555529684, 1.55319421073715, 0.875684015185201, 0.885810900384003, 
0.968966012850972, 0.903583472065802, 0.929241275404384, 0.928693033054035, 
0.885940062806948, 0.812794500845774, 1.05374707833118, 1.20584906712117, 
1.52863738732854, 1.49350509511768, 1.764425507212, 1.4427075343768, 
1.50555166076425, 1.11778354361208, 1.10212019137791, 0.916392369153426, 
0.963107068443978, 0.845676581451727, 0.971090795785566, 0.855977353338137, 
0.980098036879563, 0.959837031977444, NA, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0.711303297442365, 0, 0.490990253030983, 0, 0.599230657553818, 
0, 0.512057009474903, 0, 1.95724387122008, 0, 1.75564565521254, 
0, 2.59194214104019, 0, 2.33853586673371, 0, 1.64200213656485, 
0, 0.903290687394742, 0, 0.498624481575437, 0, 0.668487097856047, 
0, NA)), row.names = c(NA, -135L), vars = list(quote(canopy_understory), 
    quote(season)), drop = TRUE, .Names = c("canopy_understory", 
"season", "hour", "temp_mean", "temp_sd"), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

dump(list="ridge_ls",file="dump_ridge")

的输出
ridge_ls <-
structure(list(canopy_understory = c("c", "c", "c", "c", "c", 
"c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", 
"c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", 
"c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", 
"c", "c", "c", "c", "c", "u", "u", "u", "u", "u", "u", "u", "u", 
"u", "u", "u", "u", "u", "u", "u", "u", "u", "u", "u", "u", "u", 
"u", "u", "u", "u", "u", "u", "u", "u", "u", "u", "u", "u", "u", 
"u", "u", "u", "u", "u", "u", "u", "u", "u", "u", "u", "u", "u", 
"u", "u"), season = c("dry", "dry", "dry", "dry", "dry", "dry", 
"dry", "dry", "dry", "dry", "dry", "dry", "dry", "dry", "dry", 
"dry", "dry", "dry", "dry", "dry", "dry", "dry", "dry", "dry", 
"wet", "wet", "wet", "wet", "wet", "wet", "wet", "wet", "wet", 
"wet", "wet", "wet", "wet", "wet", "wet", "wet", "wet", "wet", 
"wet", "wet", "wet", "wet", "wet", "wet", NA, "dry", "dry", "dry", 
"dry", "dry", "dry", "dry", "dry", "dry", "dry", "dry", "dry", 
"dry", "dry", "dry", "dry", "dry", "dry", "dry", "dry", "dry", 
"dry", "dry", "dry", "wet", "wet", "wet", "wet", "wet", "wet", 
"wet", "wet", "wet", "wet", "wet", "wet", "wet", "wet", "wet", 
"wet", "wet", "wet", "wet", "wet", "wet", "wet", "wet", "wet", 
NA), hour = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 
24L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, NA, 1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 1L, 2L, 3L, 4L, 
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 
19L, 20L, 21L, 22L, 23L, 24L, NA), .Label = c("[0,100]", "(100,200]", 
"(200,300]", "(300,400]", "(400,500]", "(500,600]", "(600,700]", 
"(700,800]", "(800,900]", "(900,1e+03]", "(1e+03,1.1e+03]", "(1.1e+03,1.2e+03]", 
"(1.2e+03,1.3e+03]", "(1.3e+03,1.4e+03]", "(1.4e+03,1.5e+03]", 
"(1.5e+03,1.6e+03]", "(1.6e+03,1.7e+03]", "(1.7e+03,1.8e+03]", 
"(1.8e+03,1.9e+03]", "(1.9e+03,2e+03]", "(2e+03,2.1e+03]", "(2.1e+03,2.2e+03]", 
"(2.2e+03,2.3e+03]", "(2.3e+03,2.4e+03]"), class = "factor"), 
    temp_mean = c(22.562356127285, 21.8575176589304, 22.2091712707182, 
    21.4594349142281, 21.8741436464088, 21.2627648839556, 22.5725966850829, 
    23.8943491422805, 26.5008830022075, 27.0156407669021, 28.582398239824, 
    28.2173562058527, 29.3534065934066, 28.4145454545455, 28.5924259055982, 
    26.9787878787879, 26.7398685651698, 24.8841253791709, 24.7435307017544, 
    23.5766430738119, 23.8053728070175, 22.9302325581395, 23.238048245614, 
    22.4414285714286, 21.217952694843, 20.0397517730496, 23.4641860465116, 
    19.8450354609929, 23.2626356589147, 19.7900423728814, 24.0725581395349, 
    21.725918079096, 26.9862015503876, 23.7948799435028, 28.9759689922481, 
    24.5474390674673, 29.5715838509317, 24.3418226774991, 28.5812111801242, 
    23.115411806292, 26.9599378881988, 21.66085572843, 25.3357142857143, 
    20.9439179632249, 24.6169517884914, 20.5703218960028, 24.1321928460342, 
    20.0532657657658, NA, 22.7831422791639, 22.5624888093107, 
    22.2762711864407, 22.232139659803, 21.9816550348953, 22.0190689346464, 
    22.0713858424726, 22.9452103849597, 23.977390438247, 25.0091397849462, 
    25.7459163346614, 26.1808968609865, 26.4856007944389, 26.529226618705, 
    26.3300893743793, 25.837376460018, 25.3583580613254, 24.7221422142214, 
    24.2077304261645, 23.820702070207, 23.5147817460317, 23.3325225225225, 
    23.065376984127, 22.8946456692913, 23.837513397642, 23.646668909825, 
    23.6674418604651, 23.4305079044736, 23.4649224806202, 23.2881367292225, 
    23.5646034816248, 24.3536863270777, 24.9369439071567, 25.8866957104558, 
    26.5365570599613, 26.7293565683646, 27.253488372093, 26.7179624664879, 
    27.0480620155039, 26.019235924933, 26.193786407767, 25.1097217566209, 
    25.2966990291262, 24.5485417365069, 24.6792233009709, 24.2033199195171, 
    24.2778210116732, 23.9984624846248, NA), temp_sd = c(2.73648774464832, 
    3.89501902764261, 1.17914861593683, 3.84962929916136, 1.21627739822434, 
    3.843890078235, 1.48694927255381, 4.53475560968754, 2.4692449230815, 
    5.31072172311035, 2.7801646331622, 5.60552770772567, 3.00665642895969, 
    5.37991344736766, 2.54329494228649, 4.9109846279159, 2.01316086028232, 
    4.31184236532998, 1.40152881522735, 3.98898666713388, 1.17969636144613, 
    3.85727479873907, 1.1230885583311, 3.52267091349933, 7.52465558906129, 
    8.51904516252295, 0.89825806034385, 8.44060231674709, 0.894435454152911, 
    8.40140354279228, 0.954983885810659, 9.28188638752829, 2.08714908301206, 
    10.2555288869657, 2.55244904907137, 10.6329140791161, 2.78976411219072, 
    10.5641962864403, 2.62101883698402, 9.99645036090208, 2.06715648209951, 
    9.26879044865175, 1.22008755481441, 8.9245381254764, 0.956910613039714, 
    8.76186042815369, 0.872979293848191, 8.86368969220654, NA, 
    1.07371319492319, 1.09881749779836, 1.21307549226642, 1.2598629477623, 
    1.31286527191395, 1.36683838145348, 1.4137155267231, 1.42622231911717, 
    1.67860763496765, 1.79425260160877, 2.07042827387483, 2.10731458932958, 
    2.25919838602871, 2.13163564157989, 2.00443799155384, 1.78306510492528, 
    1.6813351820614, 1.41841025766038, 1.31504560496238, 1.16441372650497, 
    1.13358486644718, 1.04842014209596, 1.04971819964, 1.03129409017733, 
    0.781095011320959, 0.81578111612132, 0.84436800431466, 0.852579597106629, 
    0.861309791588296, 0.900921426522036, 0.850538183149708, 
    0.86563230808076, 1.13990715262908, 1.29807379664268, 1.71755587993833, 
    1.64101678624236, 1.995102606277, 1.63987330277186, 1.83556454743413, 
    1.35735241712701, 1.49013625660173, 1.02009949230349, 1.07160411808372, 
    0.800509386066778, 0.854705093200672, 0.765802355814258, 
    0.819364989149883, 0.77707386292288, NA)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -98L), vars = list(
    quote(canopy_understory), quote(season)), drop = TRUE, .Names = c("canopy_understory", 
"season", "hour", "temp_mean", "temp_sd"))

1 个答案:

答案 0 :(得分:0)

我最终做的是创建我想要绘制的每个条件的子集。这并不像我想的那么优雅,但确实有效。

valley_dry_canopy_subset <-subset(valley_ls,season=="dry"&canopy_understory=="c")
valley_wet_canopy_subset <-subset(valley_ls,season=="wet"&canopy_understory=="c")
valley_dry_understory_subset <-subset(valley_ls,season=="dry"&canopy_understory=="u")
valley_wet_understory_subset <-subset(valley_ls,season=="wet"&canopy_understory=="u")

ridge_dry_canopy_subset <-subset(ridge_ls,season=="dry"&canopy_understory=="c")
ridge_wet_canopy_subset <-subset(ridge_ls,season=="dry"&canopy_understory=="c")
ridge_dry_understory_subset <-subset(ridge_ls,season=="dry"&canopy_understory=="u")
ridge_wet_understory_subset <-subset(ridge_ls,season=="dry"&canopy_understory=="u")

edge_dry_canopy_subset <-subset(edge_ls,season=="dry"&canopy_understory=="c")
edge_wet_canopy_subset <-subset(edge_ls,season=="dry"&canopy_understory=="c")
edge_dry_understory_subset <-subset(edge_ls,season=="dry"&canopy_understory=="u")
edge_wet_understory_subset <-subset(edge_ls,season=="dry"&canopy_understory=="u")

其中一个地块可以这样绘制:

ggplot(data=na.omit(valley_wet_canopy_subset),aes(x = hour,y=temp_mean))+
  geom_errorbar(aes(ymin=temp_mean-temp_sd,ymax=temp_mean+temp_sd),colour="black")+
   geom_line(aes(y=temp_mean),colour="black",size=1)+
   geom_errorbar(data=ridge_wet_canopy_subset,aes(ymin=temp_mean-temp_sd,ymax=temp_mean+temp_sd),colour="red")+
  geom_line(aes(y=temp_mean),colour="black",size=1)+
  geom_errorbar(data=edge_wet_canopy_subset,aes(ymin=temp_mean-temp_sd,ymax=temp_mean+temp_sd),colour="blue")+
  theme_classic()+
  labs(x="Time",y="Temperature in canopy May to December")