我正在尝试使用ggalluvial来跟踪学生在学期内的学术路径,并了解学生如何随着时间的推移改变课程。
这是我的数据集示例:
structure(list(id = c("1", "2", "6", "8", "9", "10", "11", "12",
"14", "15", "1", "2", "6", "8", "9", "10", "11", "12", "14",
"15", "1", "2", "6", "8", "9", "10", "11", "12", "14", "15",
"1", "2", "6", "8", "9", "10", "11", "12", "14", "15", "1", "2",
"6", "8", "9", "10", "11", "12", "14", "15", "1", "2", "6", "8",
"9", "10", "11", "12", "14", "15", "1", "2", "6", "8", "9", "10",
"11", "12", "14", "15", "1", "2", "6", "8", "9", "10", "11",
"12", "14", "15"),
curr = c("CURR1", "CURR1", "CURR1", "CURR1",
"CURR1", "CURR1", "CURR1", "CURR1", "CURR1", "CURR1", "CURR3",
"CURR3", "CURR3", "CURR3", "CURR3", "CURR3", "CURR3", "CURR3",
"CURR3", "CURR3", "CURR5", "CURR5", "CURR5", "CURR5", "CURR5",
"CURR5", "CURR5", "CURR5", "CURR5", "CURR5", "CURR7", "CURR7",
"CURR7", "CURR7", "CURR7", "CURR7", "CURR7", "CURR7", "CURR7",
"CURR7", "CURR9", "CURR9", "CURR9", "CURR9", "CURR9", "CURR9",
"CURR9", "CURR9", "CURR9", "CURR9", "CURR11", "CURR11", "CURR11",
"CURR11", "CURR11", "CURR11", "CURR11", "CURR11", "CURR11", "CURR11",
"CURR13", "CURR13", "CURR13", "CURR13", "CURR13", "CURR13", "CURR13",
"CURR13", "CURR13", "CURR13", "CURR15", "CURR15", "CURR15", "CURR15",
"CURR15", "CURR15", "CURR15", "CURR15", "CURR15", "CURR15"),
value = c("ISDS", "ISDS", "GBUS", "ISDS", "GBUS", "ISDS",
"ACCT", "GBUS", "ITF", "MGT", "ISDS", "ISDS", "GBUS", "ISDS",
"MKT", "ISDS", "ACCT", "GBUS", "ITF", "MGT", "ISDS", "ISDS",
"ISDS", "ISDS", "MKT", "ISDS", "ACCT", "GBUS", "ISDS", "MGT",
"ISDS", "ISDS", "ISDS", "ISDS", "ISDS", "ISDS", "ACCT", "GBUS",
"ISDS", "ISDS", "ISDS", "ISDS", "ISDS", NA, "ISDS", "ISDS",
"ACCT", "ISDS", "ISDS", "ISDS", "ISDS", "ISDS", "ISDS", "ISDS",
"ISDS", "ISDS", "ISDS", "ISDS", "ISDS", "ISDS", "ISDS", NA,
"ISDS", "ISDS", "ISDS", NA, "ISDS", "ISDS", "ISDS", "ISDS",
"ISDS", NA, "ISDS", "ISDS", "ISDS", NA, "ISDS", "ISDS", "ISDS",
NA)), class = "data.frame", row.names = c(NA, -80L), .Names = c("id",
"curr", "value"))
我想映射:
答案 0 :(得分:2)
抱歉延误。我刚刚合并了一个实验分支,包括一个单独的geom,用于绘制轴之间的“流量”,而不是跨越整个图表的完整“alluvia”,以及一堆新参数。这假设在OP中为ff2
分配structure()
调用,使用以下代码可以描述您所描述的情节。
# keep the values of 'curr' in their proper order
ff2$curr <- factor(ff2$curr, levels = unique(ff2$curr))
ggplot(ff2, aes(
# position aesthetics:
# 'x' as in 'geom_bar()'
# 'stratum' and 'alluvium' specific to ggalluvial
x = curr, stratum = value, alluvium = id,
# apply 'fill' colors to both flows and strata
fill = value
)) +
# flow parameters:
# 'lode.guidance' says how to arrange splines in each stratum
# 'aes.flow' says which axis determines flow aesthetics
geom_flow(lode.guidance = "rightleft", aes.flow = "forward") +
geom_stratum() +
# include text labels at each stratum
geom_text(stat = "stratum")
感谢您指出这种需求,特别是以一致的方式处理NA
!
答案 1 :(得分:0)
尝试以下方法。它不华丽,但它的工作原理。您可以使用基本图形来清理它。
安装以下软件包(如果尚未安装),然后加载它们:
library(alluvial)
library(tidyr)
编辑您的数据:
ff2$value[is.na(ff2$value)] <- "None" # Replace NAs with a category so they're not lost
ff2$curr <- as.numeric(substr(ff2$curr, 5, nchar(ff2$curr))) # Change your term labels to numeric for easy & correct ordering
ff3 <- spread(ff2, curr, value, fill = "None") #spread your df from long to wide format
按学生为您的图表着色,以便更轻松地跟踪:
cl <- colors(distinct = TRUE)
color_palette <- sample(cl, length(ff3$id))
简介:
alluvial(ff3[,2:9],
freq = 8,
col = color_palette,
blocks = T,
xw = 0.2,# makes the ribbons a bit wavier
axis_labels = c("Term1","Term2", "Term3","Term4","Term5","Term6", "Term7","Term8"))