我试图显示对2个调查问题的回答的密度和相关性。对每个问题的回答是因子1-5,我想制作一个响应组合表的热图。
我的表格如下:
> table(mydata$v47, mydata$v78)
1 2 3 4 5
1 2 0 0 0 0
2 0 2 2 0 0
3 5 7 8 3 0
4 12 11 14 7 1
5 1 1 2 4 1
我会将geom_tile
传递给哪个填充函数,以便按频率区分表格中的热门对?
此代码
ggplot(data = mydata, aes(x = v47, y = v78)) + geom_tile()
产生此图像
答案 0 :(得分:4)
以下是前ggflucation()
的修改版本,它将平铺颜色和大小映射到频率:
mydata <- read.table(header = F, text = "
0 1 2 3 4 5
1 2 0 0 0 0
2 0 2 2 0 0
3 5 7 8 3 0
4 12 11 14 7 1
5 1 1 2 4 1")
library(ggplot2)
library(magrittr)
tab <- as.table(t(as.matrix(mydata[-1, -1])) %>% set_colnames(1:5) %>% set_rownames(1:5))
ggfluc <- function(tab) {
if (is.table(tab))
tab <- as.data.frame(t(tab))
tab <- as.data.frame(tab)
oldnames <- names(tab)
names(tab) <- c("x", "y", "result")
tab <- transform(tab, x = as.factor(x), y = as.factor(y), freq = result)
ceiling = max(tab$freq); floor = 0
tab <- transform(tab, freq = sqrt(pmin(freq, ceiling)/ceiling),
border = ifelse(is.na(freq), "grey90", ifelse(freq > ceiling, "grey30", "grey50")))
tab[is.na(tab$freq), "freq"] <- 1
tab <- subset(tab, freq * ceiling >= floor)
nx <- length(levels(tab$x))
ny <- length(levels(tab$y))
p <- ggplot(tab, aes_string(x = "x", y = "y", height = "freq", width = "freq", fill = "result")) +
geom_tile(colour = "white")
p
}
ggfluc(tab)