我有一个带有标准id,variable和value列的融合数据框。 变量有4个级别。
我想使用ggplot使用每个因子的值绘制散点图
来说明
data.frame(id= gl(4,1,labels=paste("id",1:4,sep="")), variable=gl(4,4,labels=LETTERS[1:4]),value=rnorm(16))
id variable value
1 id1 A -0.494270766
2 id2 A 0.189400188
3 id3 A -0.550961030
4 id4 A -1.046945450
5 id1 B -0.525552660
6 id2 B -0.293601677
7 id3 B 0.009664513
8 id4 B -0.214687215
9 id1 C 1.253551926
10 id2 C -1.241847326
11 id3 C -0.307036508
12 id4 C -0.228632605
13 id1 D -1.683798512
14 id2 D -0.419295267
15 id3 D -0.154469178
16 id4 D -0.763460558
我想为每对变量A对B,A对C,A对D,B对C等生成ggplot散点图,然后对它们进行平滑处理。
干杯, 戴维
答案 0 :(得分:4)
以下是 ggplot2 中plotmatrix
的略微修改版本:
dat <- data.frame(id= gl(4,1,labels=paste("id",1:4,sep="")), variable=gl(4,4,labels=LETTERS[1:4]),value=rnorm(16))
require(reshape2)
dat <- dcast(dat,id~variable)
plotmatrix <- function (data, mapping = aes(), colour = "black")
{
grid <- expand.grid(x = 1:ncol(data), y = 1:ncol(data))
grid <- subset(grid, x != y)
all <- do.call("rbind", lapply(1:nrow(grid), function(i) {
xcol <- grid[i, "x"]
ycol <- grid[i, "y"]
data.frame(xvar = names(data)[ycol], yvar = names(data)[xcol],
x = data[, xcol], y = data[, ycol], data)
}))
all$xvar <- factor(all$xvar, levels = names(data))
all$yvar <- factor(all$yvar, levels = names(data))
densities <- do.call("rbind", lapply(1:ncol(data), function(i) {
data.frame(xvar = names(data)[i], yvar = names(data)[i],
x = data[, i])
}))
densities$xvar <- factor(densities$xvar, levels = names(data))
densities$yvar <- factor(densities$yvar, levels = names(data))
mapping <- defaults(mapping, aes_string(x = "x", y = "y"))
class(mapping) <- "uneval"
ggplot(all, mapping) +
facet_grid(xvar ~ yvar, scales = "free") +
geom_point(colour = colour, na.rm = TRUE) +
stat_density(aes(x = x,y = ..scaled.. * diff(range(x)) + min(x)),
data = densities,position = "identity", colour = "grey20", geom = "line") +
geom_smooth(se = FALSE,method = "lm",colour = "blue")
}
plotmatrix(dat[,-1])
答案 1 :(得分:3)
试试这个,
library(ggplot2)
library(reshape2)
library(plyr)
d <- data.frame(id= gl(4,1,labels=paste("id",1:4,sep="")), variable=gl(4,4,labels=LETTERS[1:4]),value=rnorm(16))
plot_pair <- function(pair=c("A","B"), d){
m <- cast(d, ...~variable, subset=(variable %in% pair))
ggplot(m, aes_string(x=pair[1], y=pair[2])) +
geom_point() +
geom_smooth()
}
pdf("allpairs.pdf")
a_ply(combn(levels(d$variable), 2), 2, plot_pair, d=d, .print=TRUE)
dev.off()
答案 2 :(得分:3)
遵循@Dason的建议尝试GGally
包并使用@ baptise的重塑代码......
library(ggplot2)
library(reshape2)
library(plyr)
library(GGally)
#
n <- 100 # number of observations
i <- 4 # number of variables, cannot exceed 26 since letters are used as labels
#
# create data, following @Davy
d <- data.frame(id= gl(n, 1, labels, paste("id", 1:n,sep="")),
variable=gl(i, n, labels=LETTERS[1:i]),value=rnorm(n*i))
#
# reshape for plotting, from @baptise
group <- unique(d$variable)
m <- dcast(d, ...~variable, subset=.(variable %in% group))
#
# make scatterplot matrix using GGally package
# as suggested by @Dason
ggpairs(m[,2:ncol(m)],
lower = list(continuous = "smooth"),
axisLabels="show")
# done!
结果是对角线上方的方框中的网格线有点忙(但毫无疑问它们可以关闭),在此之前需要进行一些其他的最后润色才能进入黄金时段。
但ggplot2
方法通常是正确的(如果需要,可以删除更平滑的方法)。 GGally
代码位于github。
值得注意的是,有一些奇妙的散点图矩阵的例子(包括代码)可以在R
的RomainFrançois'R Graph Gallery中完成。 This one与上面的完全类似。