我正在尝试使用下面的函数绘制缺失值。我收到此错误消息:
1101b
101b
有什么想法吗?
答案 0 :(得分:3)
ggplot中的x
和y
未在您的函数中指定。我将其更改为以下内容:
ggplot_missing <- function(data){
df2 <- data %>% is.na %>% melt
ggplot(df2, aes(Var2, Var1, fill=value)) +
geom_raster() +
scale_fill_grey(name="", labels=c("Present", "Missing")) +
theme_minimal() +
theme(axis.text.x = element_text(angle=45, vjust=0.5)) +
labs(x = "Variables in Dataset",
y = "Rows / observations")
}
测试数据:
df <- iris
set.seed(4)
df[sample(nrow(df), 20), 2] <- NA
df[sample(nrow(df), 30), 3] <- NA
df[sample(nrow(df), 15), 4] <- NA
ggplot_missing(df)
答案 1 :(得分:0)
OP的问题略有不同。如果要在另一个(因子)变量的不同级别可视化每个变量的缺失数据模式......
ggplot_missing2 <- function(data, xvar, yvars) {
# xvar should be a factor variable for this to work
require(ggplot2)
require(reshape2)
newvar = "variable"
newval = "value"
dl <- melt(data, id.vars = xvar, measure.vars=yvars, variable.name=newvar, value.name = newval)
dl <- dcast(dl, formula = as.formula(paste0(newvar,"~",xvar)),
fun.aggregate = function(x) sum(is.na(x)))
dl <- melt(dl, id.vars=newvar, variable.name=xvar, value.name=newval)
ggplot(dl, aes_string(x=xvar, y=newvar)) +
geom_tile(aes_string(fill=newval), color="white") +
geom_text(aes_string(label=newval)) +
scale_fill_continuous("Missing (N)", low="gray", high="cornflowerblue") +
labs(title="Missing Data Pattern")
}
测试数据:
df <- iris
set.seed(4)
df[sample(nrow(df), 20), 2] <- NA
df[sample(nrow(df), 30), 3] <- NA
df[sample(nrow(df), 15), 4] <- NA
ggplot_missing2(df)