R:堆叠的geom_area图显示空白多边形

时间:2019-05-17 13:50:35

标签: r ggplot2

关于将geom_bar转换为geom_area图,我需要一些帮助。这是我的df:

dput(df)
df <- structure(list(new_day = c(-25L, 3L, 7L, -7L, 3L, 7L, -7L, 0L,
-25L, 3L, 7L, -7L, 0L, -25L, 3L, 7L, -7L, 0L, -25L, 3L, 7L, -7L,
0L, -25L, 3L, 7L, -7L, 0L, -25L, 3L, 7L, -7L, 0L, 3L, 7L, -7L,
0L, -25L, 3L, 7L, -7L, 0L, -25L, 3L, 7L, -7L, 0L, -25L, 3L, 7L,
-25L, 3L, 7L, -7L, 0L, 3L, -7L, 0L, -25L, 7L, 3L, 7L, -7L, 0L,
-25L, 3L, 7L, -7L, 0L, -25L, 3L, 7L, 3L, 7L, -7L, 0L, -25L, 3L,
7L, -7L, 0L, 7L, -25L, 3L, 7L, -7L, 0L, 3L, 7L, -25L, -25L, -25L,
-25L, -25L, -25L, -25L), order = structure(c(8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 11L, 11L, 11L, 11L, 11L, 13L, 13L, 13L, 13L,
13L, 10L, 10L, 10L, 10L, 10L, 7L, 7L, 7L, 7L, 7L, 2L, 2L, 2L,
2L, 2L, 7L, 7L, 7L, 7L, 9L, 9L, 9L, 9L, 9L, 1L, 1L, 1L, 1L, 1L,
9L, 9L, 9L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 13L, 13L, 14L, 14L,
14L, 14L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 13L, 13L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 2L, 2L, 2L, 2L, 2L, 6L, 6L, 1L, 7L, 5L, 2L,
12L, 2L, 2L), .Label = c("Alteromonadales", "Betaproteobacteriales",
"Caulobacterales", "Chitinophagales", "Flavobacteriales", "Parvibaculales",
"Pseudomonadales", "Rhizobiales", "Rhodobacterales", "Rhodospirillales",
"Sneathiellales", "Sphingobacteriales", "Sphingomonadales", "Thalassobaculales"
), class = "factor"), family = structure(c(13L, 13L, 13L, 13L,
12L, 12L, 12L, 12L, 15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L,
16L, 7L, 7L, 7L, 7L, 7L, 11L, 11L, 11L, 11L, 11L, 1L, 1L, 1L,
1L, 1L, 11L, 11L, 11L, 11L, 14L, 14L, 14L, 14L, 14L, 4L, 4L,
4L, 4L, 4L, 14L, 14L, 14L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 16L,
16L, 17L, 17L, 17L, 17L, 8L, 8L, 8L, 8L, 8L, 5L, 5L, 5L, 16L,
16L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 8L, 8L, 8L, 8L,
8L, 10L, 10L, 6L, 11L, 3L, 1L, 9L, 1L, 1L), .Label = c("Burkholderiaceae",
"Chitinophagaceae", "Flavobacteriaceae", "Gallaecimonadaceae",
"Hyphomonadaceae", "Idiomarinaceae", "Magnetospiraceae", "Methylophilaceae",
"NS11-12_marine_group", "Parvibaculaceae", "Pseudomonadaceae",
"Rhizobiaceae", "Rhizobiales_unclassified", "Rhodobacteraceae",
"Sneathiellaceae", "Sphingomonadaceae", "Thalassobaculaceae"), class = "factor"),
    genus = structure(c(16L, 16L, 16L, 16L, 7L, 7L, 7L, 7L, 3L,
    3L, 3L, 3L, 3L, 19L, 19L, 19L, 19L, 19L, 24L, 24L, 24L, 24L,
    24L, 14L, 14L, 14L, 14L, 14L, 17L, 17L, 17L, 17L, 17L, 14L,
    14L, 14L, 14L, 15L, 15L, 15L, 15L, 15L, 5L, 5L, 5L, 5L, 5L,
    10L, 10L, 10L, 2L, 2L, 2L, 2L, 2L, 22L, 22L, 22L, 20L, 20L,
    23L, 23L, 23L, 23L, 11L, 11L, 11L, 11L, 11L, 8L, 8L, 8L,
    21L, 21L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 11L, 11L,
    11L, 11L, 11L, 13L, 13L, 9L, 14L, 4L, 6L, 12L, 1L, 18L), .Label = c("Burkholderiaceae_unclassified",
    "Cupriavidus", "Ferrovibrio", "Flavobacteriaceae_unclassified",
    "Gallaecimonas", "GKS98_freshwater_group", "Hoeflea", "Hyphomonas",
    "Idiomarina", "Marivivens", "Methylotenera", "NS11-12_marine_group_ge",
    "Parvibaculum", "Pseudomonas", "Pseudorhodobacter", "Rhizobiales_unclassified",
    "Rhodoferax", "RS62_marine_group", "Sphingomonadaceae_unclassified",
    "Sphingopyxis", "Sphingorhabdus", "Terrimonas", "Thalassobaculum",
    "uncultured"), class = "factor"), Abundance = c(0.758296593899054,
    0.728046713738242, 0.421798852637834, 0.185971692147469,
    7.36584152568739, 11.0004160226707, 1.93134577450352, 19.7144376530921,
    46.2350237547082, 25.8715062086956, 22.1549641486618, 34.4112477828867,
    20.4937613394223, 3.73518219692229, 15.9295990367068, 13.8490383262387,
    13.3481723220855, 20.3866145291388, 0.165618346100574, 8.86991024549668,
    8.5330814375361, 6.86819004205197, 5.72129192186814, 1.04512973253723,
    3.77880217461655, 6.47871112880127, 1.12084852451492, 0.903754246093232,
    19.0854333497858, 15.7152146349298, 12.3768753373503, 15.8790763239117,
    10.2875187327705, 2.82159106304821, 4.22393981370602, 8.82452898193968,
    4.8507226701533, 6.19619716749583, 8.28477594908417, 8.05201189383953,
    9.7404731686272, 9.84535225459449, 1.7940554465653, 2.62276259756813,
    2.74008811315788, 0.543937440677315, 0.55325167765205, 0.910457573040239,
    0.451385497886567, 0.655661306732001, 6.59400178917785, 1.92570846362683,
    2.62192443054515, 2.10049053655497, 2.13139299576524, 0.20799245164738,
    0.324291631088576, 0.369492771993701, 1.52162438803598, 0.151864202275619,
    0.420953084533189, 0.391517677365401, 0.29116200940885, 0.232440441774702,
    4.21428798609281, 0.859779996836882, 1.33107018783728, 1.013155122065,
    0.447286602320585, 0.165001492967355, 0.285983094976304,
    0.377758692391269, 0.21556919104275, 0.314057858254493, 0.354649793637887,
    0.338799824269294, 0.218027624939685, 0.914324162324944,
    1.22932824654674, 0.731649603629864, 0.566393265064962, 0.247942012186621,
    1.73171328618728, 0.636597714441988, 0.505393049999761, 0.491318560043637,
    0.990988961717433, 0.195417142399681, 0.210412739808352,
    0.476107780140271, 0.936663899397428, 0.251540964619117,
    0.963667386912928, 0.504905545701818, 0.296220086916766,
    0.240809811677774)), class = "data.frame", row.names = c(52L,
68L, 72L, 93L, 165L, 169L, 190L, 194L, 246L, 262L, 266L, 287L,
291L, 343L, 359L, 363L, 384L, 388L, 440L, 456L, 460L, 481L, 485L,
634L, 650L, 654L, 675L, 679L, 731L, 747L, 751L, 772L, 776L, 844L,
848L, 869L, 873L, 925L, 941L, 945L, 966L, 970L, 1022L, 1038L,
1042L, 1063L, 1067L, 1216L, 1232L, 1236L, 1313L, 1329L, 1333L,
1354L, 1358L, 1426L, 1451L, 1455L, 1507L, 1527L, 1717L, 1721L,
1742L, 1746L, 2186L, 2202L, 2206L, 2227L, 2231L, 2380L, 2396L,
2400L, 3075L, 3079L, 3294L, 3298L, 3350L, 3366L, 3370L, 3391L,
3395L, 3467L, 4223L, 4239L, 4243L, 4264L, 4268L, 4433L, 4437L,
4708L, 4805L, 4902L, 5193L, 5969L, 7909L, 8006L))

这是结构:

> str(df)
'data.frame':   96 obs. of  5 variables:
 $ new_day  : int  -25 3 7 -7 3 7 -7 0 -25 3 ...
 $ order    : Factor w/ 14 levels "Alteromonadales",..: 8 8 8 8 8 8 8 8 11 11 ...
 $ family   : Factor w/ 17 levels "Burkholderiaceae",..: 13 13 13 13 12 12 12 12 15 15 ...
 $ genus    : Factor w/ 24 levels "Burkholderiaceae_unclassified",..: 16 16 16 16 7 7 7 7 3 3 ...
 $ Abundance: num  0.758 0.728 0.422 0.186 7.366 ...

我的数据是关于一段时间内物种的相对丰富度的,我删除了稀有物种,所以它不再累加到100%, 但这很好,每个日期约为98%。但是,我从这些错误的分组等中得到了这些奇怪的自由多边形和三角形,但是group参数在这里没有任何改变。我还尝试了几个positionstat参数,但没有帮助。也许与因素的顺序有关?

我正在寻找的是堆积的大量订单的堆积图,中间没有空格。Create proportional geom_area plot directly in ggplot2

# area plot combining species on order level
ggplot(df, aes(x = new_day, y = Abundance, fill = order)) +
  geom_area(stat = "identity") +
  geom_vline(aes(xintercept = 0), linetype = "dashed", size = 1.2)

plot on order level

进入更详细的层次结构级别时,我得到的怪异形状变少了(类而不是顺序)

# area plot on genus level
ggplot(df, aes(x = new_day, y = Abundance, fill = genus)) +
  geom_area(stat = "identity", position = "stack") +
  geom_vline(aes(xintercept = 0), linetype = "dashed", size = 1.2)

plot on genus level

但是在给定时间内,这些空白区域仍然比应有的丰度之和还要多

# total abundance per day
sum(subset(df, new_day == -25)$Abundance)
[1] 98.03997

关于如何解决此问题的任何建议?

1 个答案:

答案 0 :(得分:2)

问题在于,有时甚至对于一个更详细的层次结构级别,一个 new_day 都有多个 bundancy 值。

这就是在面积图中产生不连续性的原因。每个 new_day 仅需要一个唯一值。在下面的示例中,我只是按照 new_day order 分组后获得了第一个丰度值,但这可能与您要显示的内容无关。 (无论您需要什么,都可以取均值或将这些值归因于介于它们之间的其他 new_day 点)。

剩余的小间隙是由于缺少 abundance 值引起的,因为正如您所说,它的总和不是100%。没什么大不了的,但是您可以通过将丢失的值替换为0来解决它。

编辑:现在按照您所说的那样进行丰度值的总和,并通过将缺失值替换为0来消除剩余的小间隙。

library(tidyverse)

df %>%
  # Sum abundance values, to only keep one per point
  group_by(new_day, order) %>% 
  summarise(abundance=sum(Abundance)) %>%
  ungroup() %>%
  # Replace missing values by 0
  spread(key=order, value=abundance) %>%
  gather(key=order, value=abundance, -new_day) %>%
  replace_na(list(abundance=0)) -> data

ggplot(data, aes(x = new_day, y = abundance, fill=order)) +
  geom_area(stat = "identity") +
  geom_vline(aes(xintercept = 0), linetype = "dashed", size = 1.2)

enter image description here