我有一个带有(很多)横断面的数据集,其中土地高度(x是水平距离,z是海拔,以米为单位)和植被结构。 b_ml,b_kl1,b_kl2和b_s1是苔藓层,草本层1,草本层2和灌木层的覆盖率%和h_ ??是同一层的高度(苔藓层总是3厘米)。我想在有吸引力的图表中呈现这些数据。这是一个数据样本(1个横断面):
#sample data
structure(list(X = c(432529.4846, 432530.4562, 432531.3492, 432532.3046,
432533.3252, 432533.3419, 432534.3361, 432535.2709, 432536.3843,
432537.4198, 432538.4336, 432539.3736, 432540.448, 432541.3967,
432542.2748, 432543.0681, 432544.2508, 432545.2269, 432546.0911,
432547.0195, 432548.0396, 432549.0209, 432549.9539, 432550.9391,
432551.8999, 432552.8647, 432553.8258, 432554.7889, 432555.7722,
432556.7435, 432557.7456, 432558.714, 432559.6824, 432560.6456,
432561.6306, 432562.6059, 432563.5442, 432564.5688, 432565.5408,
432566.5742), Z = c(10.6399, 10.5674, 10.4631, 10.3553, 10.2856,
10.2392, 10.1234, 10.0792, 10.037, 9.9068, 9.834, 9.7207, 9.6024,
9.5322, 9.3343, 9.2266, 9.0705, 8.9673, 8.9716, 8.8292, 8.7495,
8.6541, 8.5429, 8.4306, 8.2933, 8.1877, 7.9679, 7.7799, 7.6801,
7.5073, 7.3754, 7.2414, 7.0254, 7.0095, 6.8553, 6.7976, 6.7528,
6.7151, 6.5225, 6.1952), b_ml = c(0.1, 0.05, NA, 0.1, 0.05, NA,
NA, NA, 0.05, NA, 0.1, 0.2, 0.1, 0.05, NA, NA, NA, NA, NA, NA,
NA, 0.01, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA), h_kl1 = c(75, 75, 70, 80, 80, 70, NA, 30,
30, 50, 60, 60, 60, 60, 40, 30, 30, 70, 10, 10, 10, 20, 30, 30,
30, 15, 15, 7, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 3, 3), b_kl1 = c(0.9,
0.95, 0.85, 0.95, 0.95, 0.9, NA, 0.4, 0.8, 0.9, 0.9, 0.9, 0.4,
0.05, 0.6, 0.1, 0.05, 0.1, 0.1, 0.05, 0.1, 0.1, 0.98, 1, 1, 0.98,
0.98, 0.9, 0.95, 0.95, 0.9, 0.8, 0.4, 0.95, 0.98, 0.95, 0.5,
0.25, 0.05, 0.01), h_kl2 = c(110, 110, 110, 100, NA, 110, NA,
110, 110, 100, 110, 120, 110, 110, 110, 70, 70, 120, 130, 120,
110, 50, 80, NA, 90, NA, NA, NA, NA, 20, NA, NA, NA, NA, NA,
45, 45, 25, NA, NA), b_kl2 = c(0.05, 0.05, 0.03, 0.02, NA, 0.1,
NA, 0.05, 0.05, 0.05, 0.05, 0.1, 0.05, 0.1, 0.05, 0.95, 0.95,
0.95, 0.95, 0.95, 0.95, 0.95, 0.01, NA, 0.01, NA, NA, NA, NA,
0.05, NA, NA, NA, NA, NA, 0.1, 0.6, 0.1, NA, NA), h_s1 = c(NA,
NA, NA, NA, NA, NA, NA, 300, 250, NA, NA, NA, 270, 270, 250,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), b_s1 = c(NA, NA, NA, NA,
NA, NA, NA, 0.7, 0.6, NA, NA, NA, 0.75, 0.75, 0.75, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA)), .Names = c("X", "Z", "b_ml", "h_kl1",
"b_kl1", "h_kl2", "b_kl2", "h_s1", "b_s1"), class = "data.frame", row.names = 40:79)
我尝试使用geom_line和geom_pointrange绘制它们,使用密度作为alpha,但这看起来并不好看:
#set the terrein heigth
ld <- 2
p <- ggplot(df, aes(x=X,y=Z)) + geom_line()
#add the vegetation layers
p + geom_line(aes(x=X,y=Z+0.02, alpha = b_ml), size = ld, color = "darkgreen") +
geom_line(aes(x=X,y=Z+h_kl1/100, alpha = b_kl1), size = ld, color = "green") + #divide by 100 because z is in meters and h in cm
geom_line(aes(x=X,y=Z+h_kl2/100, alpha = b_kl2), size = ld, color = "green") +
geom_line(aes(x=X,y=Z+h_s1/100, alpha = b_s1), size = ld, color = "brown") +
theme_bw()
#add the vegetation layers with pointrange
p + geom_pointrange(aes(x = X, ymin = Z, ymax=Z+0.03, alpha = b_ml), color = "darkgreen") +
geom_pointrange(aes(x = X, ymin = Z+0.03, ymax=Z+h_kl1/100, alpha = b_kl1), color = "green") +
geom_pointrange(aes(x = X, ymin = Z+h_kl1/100, ymax=Z+h_kl2/100, alpha = b_kl2), color = "green") +
geom_pointrange(aes(x = X, ymin = Z+h_kl2/100, ymax=Z+h_s1/100, alpha = b_s1), color = "brown") +
theme_bw()
我想我想用一个代表层密度的点密度制作一个更好看的图。就像这张来自大自然的照片:
基本上,两层之间的区域用点填充,点数取决于密度/覆盖率 但我有点坚持如何到达那里。我还想过'geom_point'和'geom_dotplot',但这包括手工计算点数。
有关如何实现这一目标的任何建议?例如。哪个geom要使用(抖动?),或者如何重构我的数据?
答案 0 :(得分:3)
我认为你没有实际的数据来创建一个情节作为你的例子,仅仅是因为他们似乎有一定数量的数据点(个人观察?某种扫描技术?),而你只有密度估计。从中创造点似乎充其量令人困惑,并且可能具有误导性。如果你有计算这些密度的原始数据,那么这可能是另一回事。以下是您可以制作的图表的一些示例:
首先,我以整齐的格式重新排列数据,以便更有意义:
library(dplyr)
library(tidyr)
densities <- df %>%
select(-h_kl1, -h_kl2, -h_s1) %>%
gather('type', 'density', b_ml, b_kl1, b_kl2, b_s1) %>%
mutate(type = substring(type, 3))
heights <- df %>%
select(-b_ml, -b_kl1, -b_kl2, -b_s1) %>%
gather('type', 'height', h_kl1, h_kl2, h_s1) %>%
mutate(type = substring(type, 3))
df2 <- left_join(densities, heights) %>%
mutate(height = ifelse(type == 'ml', 0.03, height / 100),
type = factor(type, levels = c('s1', 'kl2', 'kl1', 'ml')))
尝试1:
ggplot(df2, aes(X, Z)) +
geom_line() +
geom_linerange(aes(ymin = Z, ymax = Z + height, alpha = density, col = type), size = 4) +
scale_alpha_continuous(range = c(0, 0.7))
一个问题是ml
几乎是不可见的,因为它与其他人的规模大不相同。我们可以试着躲闪:
ggplot(df2, aes(X, Z)) +
geom_line() +
geom_linerange(aes(ymin = Z, ymax = Z + height, alpha = density, col = type),
size = 2, position = position_dodge(1))
这没什么用。也许是分面:
ggplot(df2, aes(X, Z)) +
geom_line() +
geom_linerange(aes(ymin = Z, ymax = Z + height, alpha = density, col = type), size = 4) +
facet_wrap(~type)
最后一个选项:
ggplot(df2, aes(X, Z)) +
geom_line() +
geom_point(aes(y = Z + height, size = density, col = type), alpha = 0.6)
我认为这一点非常清楚地显示了植被的趋势。我喜欢这是同等重视不同的类别。
包括细分:
ggplot(df2, aes(X, Z)) +
geom_line() +
geom_segment(aes(xend = X, yend = Z + height), alpha = 0.1) +
geom_point(aes(y = Z + height, size = density, col = type), alpha = 0.8)
答案 1 :(得分:1)
我自己使用geom_jitter来接近解决方案。为了表明我想做什么(并且反对完全毫无根据地指责'召唤数据'和'将错误的想法置于观察者心中'[原文如此]),我将在这里发布我的尝试:
#make a tidy data.frame (taken from @Axeman)
library(dplyr)
library(tidyr)
densities <- df %>%
select(-h_kl1, -h_kl2, -h_s1) %>%
gather('type', 'density', b_ml, b_kl1, b_kl2, b_s1) %>%
mutate(type = substring(type, 3))
heights <- df %>%
select(-b_ml, -b_kl1, -b_kl2, -b_s1) %>%
gather('type', 'height', h_kl1, h_kl2, h_s1) %>%
mutate(type = substring(type, 3))
df2 <- left_join(densities, heights) %>%
mutate(height = ifelse(type == 'ml', 0.03, height / 100),
type = factor(type, levels = c('s1', 'kl2', 'kl1', 'ml')))
# repeat rows according to density
# multiply density by 100: 1% will be 10 dots, 100%, 1000 dots and NA -> 1 (jitter can't handle NA)
df2$repli <- df2$density*1000
df2$repli[is.na(df2$repli)] <- 1
df3 <- df2[rep(rownames(df2), df2$repli), ]
require(ggplot2)
p <- ggplot(df3, aes(x=X,y=Z)) + geom_line(size = 1.5, colour = "gray") + theme_bw()
p + #geom_line(aes(y = Z + height, col = type)) +
geom_jitter(aes(y = Z + height/2, col = type), position = position_jitter(width = 1, height = df3$height),
size = 0.0001, alpha = 0.1 ) +
scale_color_manual(values = c("brown", "#CCFF00", "#33CC00","#666600"))
它仍然不完美,因为有些警告信息我不明白,“紧张”和一些较小的问题之间存在重叠。但它接近我最初的想法:植被层的视觉表示,获得高度和密度。
从审美角度看,Axemans的“最后一个选择”我认为更具吸引力。