ggplot标准化每个facet_wrap

时间:2019-07-01 20:29:51

标签: r ggplot2

我正在尝试使用ggplot绘制许多图形。

代码是;

df %>%
  ggplot(aes(x = decile, y = ave_Networth)) +
  geom_point() +
  geom_errorbar(aes(ymin = ave_Networth - sd_Networth, ymax = ave_Networth + sd_Networth)) +
  facet_wrap(~AGE_bin, scales = "free")

但是,一个“十分位”会使所有其他“十分位”失真。如何标准化每个facet_wrap,以使“十分位数10”比其他“十分位数”更好

数据:

df <- structure(list(AGE_bin = c("bin_18_24", "bin_18_24", "bin_18_24", 
"bin_18_24", "bin_18_24", "bin_18_24", "bin_18_24", "bin_18_24", 
"bin_18_24", "bin_18_24", "bin_25_29", "bin_25_29", "bin_25_29", 
"bin_25_29", "bin_25_29", "bin_25_29", "bin_25_29", "bin_25_29", 
"bin_25_29", "bin_25_29", "bin_30_34", "bin_30_34", "bin_30_34", 
"bin_30_34", "bin_30_34", "bin_30_34", "bin_30_34", "bin_30_34", 
"bin_30_34", "bin_30_34", "bin_35_39", "bin_35_39", "bin_35_39", 
"bin_35_39", "bin_35_39", "bin_35_39", "bin_35_39", "bin_35_39", 
"bin_35_39", "bin_35_39", "bin_40_44", "bin_40_44", "bin_40_44", 
"bin_40_44", "bin_40_44", "bin_40_44", "bin_40_44", "bin_40_44", 
"bin_40_44", "bin_40_44", "bin_45_49", "bin_45_49", "bin_45_49", 
"bin_45_49", "bin_45_49", "bin_45_49", "bin_45_49", "bin_45_49", 
"bin_45_49", "bin_45_49", "bin_50_54", "bin_50_54", "bin_50_54", 
"bin_50_54", "bin_50_54", "bin_50_54", "bin_50_54", "bin_50_54", 
"bin_50_54", "bin_50_54", "bin_55_59", "bin_55_59", "bin_55_59", 
"bin_55_59", "bin_55_59", "bin_55_59", "bin_55_59", "bin_55_59", 
"bin_55_59", "bin_55_59", "bin_60_64", "bin_60_64", "bin_60_64", 
"bin_60_64", "bin_60_64", "bin_60_64", "bin_60_64", "bin_60_64", 
"bin_60_64", "bin_60_64", "bin_65_90", "bin_65_90", "bin_65_90", 
"bin_65_90", "bin_65_90", "bin_65_90", "bin_65_90", "bin_65_90", 
"bin_65_90", "bin_65_90"), decile = c(1L, 2L, 3L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L), ave_Networth = c(-42799.9514563107, -13326.7549019608, 
-3804.56310679612, 182.656862745098, 2367.56310679612, 5490.49019607843, 
10219.3786407767, 15573.6666666667, 32942.0873786408, 1215195, 
-88543.938547486, -24661.6629213483, -6073.77653631285, 1068.8595505618, 
4448.57541899441, 9035.94943820225, 18357.6983240223, 38919.7191011236, 
75015.1117318436, 360729.747191011, -83359.7058823529, -10225.6590909091, 
645.52036199095, 6519.20454545455, 16950.520361991, 32910.3090909091, 
59947, 99614.9363636364, 193918.529411765, 4559636.43636364, 
-43682.3646341463, -2316.69918699187, 3812.26016260163, 14740.2073170732, 
31149.2520325203, 61549.8536585366, 111223.390243902, 220390.162601626, 
431319.044715447, 39707858.5813008, -47304.6305970149, 908.816479400749, 
9145.00746268657, 31857.7191011236, 76431.6940298507, 136961.548689139, 
239810.029850746, 409516.632958802, 799214.123134328, 23437733.3146067, 
-23534.0347222222, 5107.85069444444, 19872.9652777778, 62279.3229166667, 
141796.600694444, 254556.736111111, 481769.951388889, 928658.868055556, 
2675194.20138889, 43276345.59375, -18186.3404255319, 7353.51671732523, 
40047.4984802432, 106741.513677812, 201771.617021277, 349972.689969605, 
632600.917933131, 1395636.24620061, 7467362.75075988, 100107189.018237, 
-12872.2715053763, 12844.7601078167, 64099.5698924731, 162562.291105121, 
324875.215053763, 672549.58490566, 1464271.26344086, 3975236.26684636, 
15764846.3172043, 133524703.185984, -2200.25284090909, 29720.5982905983, 
108347.6875, 241293.168091168, 464230.673295455, 805069.43019943, 
1655039.94318182, 4463594.18803419, 15625284.53125, 129427645.128205, 
-12766.4538361508, 46987.2743823147, 140018.637191157, 279912.055006502, 
506132.146423927, 860558.538361508, 1646383.63328999, 4331098.02275683, 
20196055.0429129, 222960808.747984), sd_Networth = c(19952.4205187352, 
4009.59002234056, 1755.86187802571, 354.750993275092, 875.657161288449, 
983.36347182754, 1154.42818471179, 2673.13307234081, 9814.53960254566, 
3229068.69348881, 80763.4718472531, 6641.16873426075, 3948.88255496786, 
823.703039739828, 1163.67857190816, 1530.58811957344, 4659.8855411689, 
7753.20657773506, 14988.8062067764, 391183.089014281, 79403.7440792276, 
6771.38822139849, 1076.50097149467, 2085.51169306169, 3538.12985729517, 
6773.87617091665, 8136.08653692693, 15575.370906716, 43499.5408140372, 
16251487.9059923, 35314.6097298394, 2791.20649192616, 2213.05829515479, 
3292.91147796933, 6575.35932388955, 11179.6713837163, 22475.5169477255, 
39758.4468521584, 119334.223663411, 174371376.396929, 49949.5306903766, 
1799.05664503879, 4185.55767385215, 9485.99298648255, 15351.1273951797, 
23735.7079084032, 38061.3561426131, 60915.0623003272, 222652.345949324, 
94889492.5724926, 40634.4838428703, 3486.55103511871, 6022.01536051466, 
18357.8033065045, 30008.4145616776, 43065.4085235003, 91012.8666376759, 
203097.385703473, 1053542.62119673, 58091928.9133239, 31388.6889295018, 
5191.21573011365, 14192.8835953361, 22709.198055496, 33034.8868226208, 
54945.0489348437, 119298.977766417, 450266.641660294, 4096090.77500322, 
156293273.663792, 33679.4592685038, 7624.74535501237, 24662.3647632881, 
29814.2874815741, 66391.9192226496, 123491.617620793, 406935.703862311, 
1212704.00461397, 7023794.80821185, 141166857.287318, 16746.6945744379, 
14991.3779599531, 26718.8686094867, 49599.5165232508, 69555.903370777, 
142279.335735688, 350387.632009764, 1529856.10479949, 6307011.85646724, 
166820992.513686, 165289.391214998, 21461.3316797954, 33730.6952915096, 
51158.8410213337, 78696.8069684297, 138373.125085833, 394345.528508884, 
1597491.31445124, 10026567.8512041, 269598766.17565)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -100L), groups = structure(list(
    AGE_bin = c("bin_18_24", "bin_25_29", "bin_30_34", "bin_35_39", 
    "bin_40_44", "bin_45_49", "bin_50_54", "bin_55_59", "bin_60_64", 
    "bin_65_90"), .rows = list(1:10, 11:20, 21:30, 31:40, 41:50, 
        51:60, 61:70, 71:80, 81:90, 91:100)), row.names = c(NA, 
-10L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE))

1 个答案:

答案 0 :(得分:2)

已编辑为包含pseudo_log变换,比log_10更适合表示负数。

也许使用对数转换会有所帮助?

df %>%
  ggplot(aes(x = decile, y = ave_Networth)) +
  geom_point() +
  scale_y_log10(labels = scales::comma) +
  geom_errorbar(aes(ymin = ave_Networth - sd_Networth, 
                    ymax = ave_Networth + sd_Networth, group = decile)) +
  facet_wrap(~AGE_bin, scales = "free", nrow = 2)

enter image description here


编辑:标准对数变换的问题在于它排除了负数,从而排除了一些低位十分位数,并省略了十分位数10中可能出现的大负数,从而以两种方式使视觉失真。 >

因此,这里可能更合适的另一种相关方法是scales::pseudo_log_trans,它将有符号对数变换(可以表示负数)与接近零的线性变换结合在一起。通过控制第一个“ sigma”项,我们可以改变线性处理与对数线性处理的空间。在这种情况下,sigma〜1000似乎最能扩展此数据集的视觉空间,但这是非常主观的。以一些不均匀的空间失真为代价,伪对数变换可以在精度和宽度之间取得很好的平衡。

df %>%
  ggplot(aes(x = decile, y = ave_Networth)) +
  geom_point() +
  scale_y_continuous(trans = scales::pseudo_log_trans(1000),
                     labels = scales::comma,
                     breaks = c(-100000000, -10000000, -1000000, -100000, -10000, -1000, 1000, 10000, 100000, 1000000, 10000000, 100000000), minor_breaks = NULL) +
  scale_x_continuous(breaks = 1:10, minor_breaks = NULL) +
  geom_errorbar(aes(ymin = ave_Networth - sd_Networth, 
                    ymax = ave_Networth + sd_Networth, group = decile)) +
  facet_wrap(~AGE_bin, scales = "free_y", nrow = 2)

enter image description here