如何用R中的3个因子绘制响应变量?

时间:2013-11-08 11:42:00

标签: r plot ggplot2 histogram

我在R中有以下表格:

  ExperimentID clients connections msgSize     Mean Deviation     Error
1             4      50          50      10 73.19379 21.313889 0.2263254
2             5      50          10      10 73.25170 21.457139 0.2265276
3             6      50         100      10 73.20642 21.396485 0.2261432
4             7      50          50    1999 53.75247 11.863616 0.1695395
5             8      50          10    1999 53.88464 12.778998 0.2234775
6             9      50         100    1999 53.99422 11.947930 0.2085102
7            10      10          50    1999 49.74034  9.296995 0.3855425
8            11      10          10    1999 49.77624  8.639379 0.3566724
9            12      10         100    1999 50.30912 10.800443 0.4442306
10           13      10          50      10 68.80108 19.674006 0.5892552
11           14      10          10      10 69.41143 19.671618 0.5845524
12           15      10         100      10 69.09130 19.821473 0.5894541
13           16     100          10    1999 56.32045 16.370877 0.1940681
14           17     100          50    1999 55.93405 14.007772 0.2272496

现在,我想将“均值”列绘制为因素的函数:客户端,连接和 msgSize。我认为合理的方法是准备两个直方图,一个用于msgSize = 10,另一个用于msgSize = 1999.在这两个直方图的每一个中,我可以在x轴上有客户端数量,在y轴上表示。对于x轴上的每个刻度线(即,对于特定的客户端值),我可以有3个柱(每个柱对应于10,50或100个连接的平均值)。我怎样才能在R中实现这一目标?

P.S:我是R的新手,所以一些详细的答案会有所帮助。如果ggplot有更好的答案,那也没关系。

1 个答案:

答案 0 :(得分:2)

data = read.csv(file="home",sep=",")

df = data.frame(data)
df

   ExperimentID clients connections msgSize     Mean Deviation     Error
1             4      50          50      10 73.19379 21.313889 0.2263254
2             5      50          10      10 73.25170 21.457139 0.2265276
3             6      50         100      10 73.20642 21.396485 0.2261432
4             7      50          50    1999 53.75247 11.863616 0.1695395
5             8      50          10    1999 53.88464 12.778998 0.2234775
6             9      50         100    1999 53.99422 11.947930 0.2085102
7            10      10          50    1999 49.74034  9.296995 0.3855425
8            11      10          10    1999 49.77624  8.639379 0.3566724
9            12      10         100    1999 50.30912 10.800443 0.4442306
10           13      10          50      10 68.80108 19.674006 0.5892552
11           14      10          10      10 69.41143 19.671618 0.5845524
12           15      10         100      10 69.09130 19.821473 0.5894541
13           16     100          10    1999 56.32045 16.370877 0.1940681
14           17     100          50    1999 55.93405 14.007772 0.2272496

par(mfrow=c(1,2)) #plot two graphs (Message Size = 10 and Message Size = 1999) side by side.

msg_10 = subset(df, data_df$msgSize == 10)
msg_10

   ExperimentID clients connections msgSize     Mean Deviation     Error
1             4      50          50      10 73.19379  21.31389 0.2263254
2             5      50          10      10 73.25170  21.45714 0.2265276
3             6      50         100      10 73.20642  21.39648 0.2261432
10           13      10          50      10 68.80108  19.67401 0.5892552
11           14      10          10      10 69.41143  19.67162 0.5845524
12           15      10         100      10 69.09130  19.82147 0.5894541


plot(msg_10$Mean ~ msg_10$clients, col=as.factor(msg_10$connections), pch=19,  xlab="Clients", ylab="Mean", xlim=c(0,60), ylim=c(68,74), main="Message Size = 10",cex.main=0.85)
legend("bottomright", legend=unique(msg_10$connections), col=as.factor(msg_10$connections),pch=19,title="connections")

msg_1999 = subset(df, data_df$msgSize == 1999)
msg_1999

   ExperimentID clients connections msgSize     Mean Deviation     Error
4             7      50          50    1999 53.75247 11.863616 0.1695395
5             8      50          10    1999 53.88464 12.778998 0.2234775
6             9      50         100    1999 53.99422 11.947930 0.2085102
7            10      10          50    1999 49.74034  9.296995 0.3855425
8            11      10          10    1999 49.77624  8.639379 0.3566724
9            12      10         100    1999 50.30912 10.800443 0.4442306
13           16     100          10    1999 56.32045 16.370877 0.1940681
14           17     100          50    1999 55.93405 14.007772 0.2272496


plot(msg_1999$Mean ~ msg_1999$clients, col=as.factor(msg_1999$connections), pch=19, xlab="Clients", ylab="Mean", xlim=c(0,100), ylim=c(48,58), main="Message Size = 1999",cex.main=0.85)
legend("bottomright", legend=unique(msg_1999$connections), col=as.factor(msg_1999$connections),pch=19,title="connections")


这是输出:

enter image description here

修改即可。我忽略了标签。所以这是另一种选择:

library(ggplot2)

#Message Size = 10
ggplot(data=msg_10,aes(clients,Mean)) + geom_point(data=msg_10,aes(color=as.factor(connections)),size=5) + theme_bw() + labs(title="Message Size = 10", color="Connections")

#Message Size = 1999
ggplot(data=msg_1999,aes(clients,Mean)) + geom_point(data=msg_1999,aes(color=as.factor(connections)),size=5) + theme_bw() + labs(title="Message Size = 1999", color="Connections")

第二张图表(Message Size = 1999)如下所示:

enter image description here