我有一个针对不同海洋浮游动物生物的密度的数据集。 我想把它呈现为条形图或饼图,但是有太多的生物体,很多标签最终都在彼此之上。
我想将“分数”列的累积总和小于5%的所有生物合并到一个新的“其他”因子中。
这是我正在使用的数据框的dput():
structure(list(species = structure(c(1L, 4L, 7L, 8L, 9L, 11L,
15L, 16L, 17L, 18L, 19L, 21L, 23L, 26L, 28L, 35L, 36L, 37L, 39L,
40L, 41L, 43L), .Names = c("", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", ""), .Label = c("Beroe cucumis",
"Beroe cucumis larvae", "Boroecia borealis", "Bradyidus similis",
"C. hyperboreus AF", "C. hyperboreus CIV", "Calanus egg", "Calanus nauplii",
"Calanus spp.", "Chaetognatha spp.", "Cirripedia nauplii", "Conchoecia borealis",
"Cyclopoida", "Echinodermata larvae", "Eukrohnia hamata", "Euphausiacea furcilia",
"Euphausiacea nauplii", "Fish larvae", "Fritillaria borealis",
"Hymenodora glacialis", "Idyrea furcata ", "Krill nauplii", "Medusa",
"Mertensia ovum", "Metridia longa", "Microcalanus spp.", "Microsetella norvegica",
"Oithona similis", "Oithona spp.", "Paraeuchaeta barbata AF",
"Paraeuchaeta barbata CII", "Paraeuchaeta barbata CV", "Paraeuchaeta glacialis",
"Paraeuchaeta spp.", "Parasagitta elegans", "Polychaeta larvae",
"Pseudocalanus spp.", "Scyphozoa larvae", "Thysanoessa inermis",
"Thysanoessa longicaudata", "Thysanoessa raschii", "Triconia borealis",
"Zoea larvae"), class = "factor"), density = c(4, 3, 205, 1431,
197, 1786, 1, 11, 50, 1, 36, 4, 1, 34, 26, 13, 83, 30, 8, 1,
0, 26), location = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Names = c("tmp_location",
"tmp_location", "tmp_location", "tmp_location", "tmp_location",
"tmp_location", "tmp_location", "tmp_location", "tmp_location",
"tmp_location", "tmp_location", "tmp_location", "tmp_location",
"tmp_location", "tmp_location", "tmp_location", "tmp_location",
"tmp_location", "tmp_location", "tmp_location", "tmp_location",
"tmp_location"), .Label = c("Hinlopen", "ICE", "KB3", "Karl Kronedjupet"
), class = "factor"), fraction = c(0.00101240192356365, 0.000759301442672741,
0.0518855985826373, 0.362186788154898, 0.04986079473551, 0.452037458871172,
0.000253100480890914, 0.00278410528980005, 0.0126550240445457,
0.000253100480890914, 0.00911161731207289, 0.00101240192356365,
0.000253100480890914, 0.00860541635029107, 0.00658061250316376,
0.00329030625158188, 0.0210073399139458, 0.00759301442672741,
0.00202480384712731, 0.000253100480890914, 0, 0.00658061250316376
)), .Names = c("species", "density", "location", "fraction"), row.names = c(87L,
90L, 93L, 94L, 95L, 97L, 101L, 102L, 103L, 104L, 105L, 107L,
109L, 112L, 114L, 121L, 122L, 123L, 125L, 126L, 127L, 129L), class = "data.frame")
答案 0 :(得分:0)
您可以使用华夫饼图表示这一点。
#Install waffle package from github
library(devtools) #make sure you have this installed
devtools::install_github("hrbrmstr/waffle")
library(waffle)
# separate rows with fractions lower than 5%
others <- df[df$fraction <.05,]
df1 <- df[df$fraction >=.05,]
# get summed values of others
others.fraction <- sum(others$fraction)
others.density <- sum(others$density)
#bind others back into df1
df2 <- rbind(df1, data.frame(species="other", density = others.density, location = "KB3", fraction = others.fraction))
# make a named vector (waffle likes this as the input) - I'm plotting densities here
densities <- df2$density
names(densities) <- df2$species
densities <- rev(sort(densities))
#plot - I'm dividing by 10 so the chart isn't too big
# I also added a title
waffle(densities/10, rows=10) + ggtitle("Something about Zooplankton")
这给出了这个情节:
您可以像ggplot图一样修改此图表 - waffle是ggplot函数的便捷包装。
PS。不要使用饼图!