我正在尝试使用ggplot
绘制许多图形。
代码是;
df %>%
ggplot(aes(x = decile, y = ave_Networth)) +
geom_point() +
geom_errorbar(aes(ymin = ave_Networth - sd_Networth, ymax = ave_Networth + sd_Networth)) +
facet_wrap(~AGE_bin, scales = "free")
但是,一个“十分位”会使所有其他“十分位”失真。如何标准化每个facet_wrap
,以使“十分位数10”比其他“十分位数”更好
数据:
df <- structure(list(AGE_bin = c("bin_18_24", "bin_18_24", "bin_18_24",
"bin_18_24", "bin_18_24", "bin_18_24", "bin_18_24", "bin_18_24",
"bin_18_24", "bin_18_24", "bin_25_29", "bin_25_29", "bin_25_29",
"bin_25_29", "bin_25_29", "bin_25_29", "bin_25_29", "bin_25_29",
"bin_25_29", "bin_25_29", "bin_30_34", "bin_30_34", "bin_30_34",
"bin_30_34", "bin_30_34", "bin_30_34", "bin_30_34", "bin_30_34",
"bin_30_34", "bin_30_34", "bin_35_39", "bin_35_39", "bin_35_39",
"bin_35_39", "bin_35_39", "bin_35_39", "bin_35_39", "bin_35_39",
"bin_35_39", "bin_35_39", "bin_40_44", "bin_40_44", "bin_40_44",
"bin_40_44", "bin_40_44", "bin_40_44", "bin_40_44", "bin_40_44",
"bin_40_44", "bin_40_44", "bin_45_49", "bin_45_49", "bin_45_49",
"bin_45_49", "bin_45_49", "bin_45_49", "bin_45_49", "bin_45_49",
"bin_45_49", "bin_45_49", "bin_50_54", "bin_50_54", "bin_50_54",
"bin_50_54", "bin_50_54", "bin_50_54", "bin_50_54", "bin_50_54",
"bin_50_54", "bin_50_54", "bin_55_59", "bin_55_59", "bin_55_59",
"bin_55_59", "bin_55_59", "bin_55_59", "bin_55_59", "bin_55_59",
"bin_55_59", "bin_55_59", "bin_60_64", "bin_60_64", "bin_60_64",
"bin_60_64", "bin_60_64", "bin_60_64", "bin_60_64", "bin_60_64",
"bin_60_64", "bin_60_64", "bin_65_90", "bin_65_90", "bin_65_90",
"bin_65_90", "bin_65_90", "bin_65_90", "bin_65_90", "bin_65_90",
"bin_65_90", "bin_65_90"), decile = c(1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L), ave_Networth = c(-42799.9514563107, -13326.7549019608,
-3804.56310679612, 182.656862745098, 2367.56310679612, 5490.49019607843,
10219.3786407767, 15573.6666666667, 32942.0873786408, 1215195,
-88543.938547486, -24661.6629213483, -6073.77653631285, 1068.8595505618,
4448.57541899441, 9035.94943820225, 18357.6983240223, 38919.7191011236,
75015.1117318436, 360729.747191011, -83359.7058823529, -10225.6590909091,
645.52036199095, 6519.20454545455, 16950.520361991, 32910.3090909091,
59947, 99614.9363636364, 193918.529411765, 4559636.43636364,
-43682.3646341463, -2316.69918699187, 3812.26016260163, 14740.2073170732,
31149.2520325203, 61549.8536585366, 111223.390243902, 220390.162601626,
431319.044715447, 39707858.5813008, -47304.6305970149, 908.816479400749,
9145.00746268657, 31857.7191011236, 76431.6940298507, 136961.548689139,
239810.029850746, 409516.632958802, 799214.123134328, 23437733.3146067,
-23534.0347222222, 5107.85069444444, 19872.9652777778, 62279.3229166667,
141796.600694444, 254556.736111111, 481769.951388889, 928658.868055556,
2675194.20138889, 43276345.59375, -18186.3404255319, 7353.51671732523,
40047.4984802432, 106741.513677812, 201771.617021277, 349972.689969605,
632600.917933131, 1395636.24620061, 7467362.75075988, 100107189.018237,
-12872.2715053763, 12844.7601078167, 64099.5698924731, 162562.291105121,
324875.215053763, 672549.58490566, 1464271.26344086, 3975236.26684636,
15764846.3172043, 133524703.185984, -2200.25284090909, 29720.5982905983,
108347.6875, 241293.168091168, 464230.673295455, 805069.43019943,
1655039.94318182, 4463594.18803419, 15625284.53125, 129427645.128205,
-12766.4538361508, 46987.2743823147, 140018.637191157, 279912.055006502,
506132.146423927, 860558.538361508, 1646383.63328999, 4331098.02275683,
20196055.0429129, 222960808.747984), sd_Networth = c(19952.4205187352,
4009.59002234056, 1755.86187802571, 354.750993275092, 875.657161288449,
983.36347182754, 1154.42818471179, 2673.13307234081, 9814.53960254566,
3229068.69348881, 80763.4718472531, 6641.16873426075, 3948.88255496786,
823.703039739828, 1163.67857190816, 1530.58811957344, 4659.8855411689,
7753.20657773506, 14988.8062067764, 391183.089014281, 79403.7440792276,
6771.38822139849, 1076.50097149467, 2085.51169306169, 3538.12985729517,
6773.87617091665, 8136.08653692693, 15575.370906716, 43499.5408140372,
16251487.9059923, 35314.6097298394, 2791.20649192616, 2213.05829515479,
3292.91147796933, 6575.35932388955, 11179.6713837163, 22475.5169477255,
39758.4468521584, 119334.223663411, 174371376.396929, 49949.5306903766,
1799.05664503879, 4185.55767385215, 9485.99298648255, 15351.1273951797,
23735.7079084032, 38061.3561426131, 60915.0623003272, 222652.345949324,
94889492.5724926, 40634.4838428703, 3486.55103511871, 6022.01536051466,
18357.8033065045, 30008.4145616776, 43065.4085235003, 91012.8666376759,
203097.385703473, 1053542.62119673, 58091928.9133239, 31388.6889295018,
5191.21573011365, 14192.8835953361, 22709.198055496, 33034.8868226208,
54945.0489348437, 119298.977766417, 450266.641660294, 4096090.77500322,
156293273.663792, 33679.4592685038, 7624.74535501237, 24662.3647632881,
29814.2874815741, 66391.9192226496, 123491.617620793, 406935.703862311,
1212704.00461397, 7023794.80821185, 141166857.287318, 16746.6945744379,
14991.3779599531, 26718.8686094867, 49599.5165232508, 69555.903370777,
142279.335735688, 350387.632009764, 1529856.10479949, 6307011.85646724,
166820992.513686, 165289.391214998, 21461.3316797954, 33730.6952915096,
51158.8410213337, 78696.8069684297, 138373.125085833, 394345.528508884,
1597491.31445124, 10026567.8512041, 269598766.17565)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -100L), groups = structure(list(
AGE_bin = c("bin_18_24", "bin_25_29", "bin_30_34", "bin_35_39",
"bin_40_44", "bin_45_49", "bin_50_54", "bin_55_59", "bin_60_64",
"bin_65_90"), .rows = list(1:10, 11:20, 21:30, 31:40, 41:50,
51:60, 61:70, 71:80, 81:90, 91:100)), row.names = c(NA,
-10L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE))
答案 0 :(得分:2)
已编辑为包含pseudo_log
变换,比log_10
更适合表示负数。
也许使用对数转换会有所帮助?
df %>%
ggplot(aes(x = decile, y = ave_Networth)) +
geom_point() +
scale_y_log10(labels = scales::comma) +
geom_errorbar(aes(ymin = ave_Networth - sd_Networth,
ymax = ave_Networth + sd_Networth, group = decile)) +
facet_wrap(~AGE_bin, scales = "free", nrow = 2)
编辑:标准对数变换的问题在于它排除了负数,从而排除了一些低位十分位数,并省略了十分位数10中可能出现的大负数,从而以两种方式使视觉失真。 >
因此,这里可能更合适的另一种相关方法是scales::pseudo_log_trans
,它将有符号对数变换(可以表示负数)与接近零的线性变换结合在一起。通过控制第一个“ sigma”项,我们可以改变线性处理与对数线性处理的空间。在这种情况下,sigma〜1000似乎最能扩展此数据集的视觉空间,但这是非常主观的。以一些不均匀的空间失真为代价,伪对数变换可以在精度和宽度之间取得很好的平衡。
df %>%
ggplot(aes(x = decile, y = ave_Networth)) +
geom_point() +
scale_y_continuous(trans = scales::pseudo_log_trans(1000),
labels = scales::comma,
breaks = c(-100000000, -10000000, -1000000, -100000, -10000, -1000, 1000, 10000, 100000, 1000000, 10000000, 100000000), minor_breaks = NULL) +
scale_x_continuous(breaks = 1:10, minor_breaks = NULL) +
geom_errorbar(aes(ymin = ave_Networth - sd_Networth,
ymax = ave_Networth + sd_Networth, group = decile)) +
facet_wrap(~AGE_bin, scales = "free_y", nrow = 2)