我在R中有以下表格:
ExperimentID clients connections msgSize Mean Deviation Error
1 4 50 50 10 73.19379 21.313889 0.2263254
2 5 50 10 10 73.25170 21.457139 0.2265276
3 6 50 100 10 73.20642 21.396485 0.2261432
4 7 50 50 1999 53.75247 11.863616 0.1695395
5 8 50 10 1999 53.88464 12.778998 0.2234775
6 9 50 100 1999 53.99422 11.947930 0.2085102
7 10 10 50 1999 49.74034 9.296995 0.3855425
8 11 10 10 1999 49.77624 8.639379 0.3566724
9 12 10 100 1999 50.30912 10.800443 0.4442306
10 13 10 50 10 68.80108 19.674006 0.5892552
11 14 10 10 10 69.41143 19.671618 0.5845524
12 15 10 100 10 69.09130 19.821473 0.5894541
13 16 100 10 1999 56.32045 16.370877 0.1940681
14 17 100 50 1999 55.93405 14.007772 0.2272496
现在,我想将“均值”列绘制为因素的函数:客户端,连接和 msgSize。我认为合理的方法是准备两个直方图,一个用于msgSize = 10,另一个用于msgSize = 1999.在这两个直方图的每一个中,我可以在x轴上有客户端数量,在y轴上表示。对于x轴上的每个刻度线(即,对于特定的客户端值),我可以有3个柱(每个柱对应于10,50或100个连接的平均值)。我怎样才能在R中实现这一目标?
P.S:我是R的新手,所以一些详细的答案会有所帮助。如果ggplot有更好的答案,那也没关系。答案 0 :(得分:2)
data = read.csv(file="home",sep=",")
df = data.frame(data)
df
ExperimentID clients connections msgSize Mean Deviation Error
1 4 50 50 10 73.19379 21.313889 0.2263254
2 5 50 10 10 73.25170 21.457139 0.2265276
3 6 50 100 10 73.20642 21.396485 0.2261432
4 7 50 50 1999 53.75247 11.863616 0.1695395
5 8 50 10 1999 53.88464 12.778998 0.2234775
6 9 50 100 1999 53.99422 11.947930 0.2085102
7 10 10 50 1999 49.74034 9.296995 0.3855425
8 11 10 10 1999 49.77624 8.639379 0.3566724
9 12 10 100 1999 50.30912 10.800443 0.4442306
10 13 10 50 10 68.80108 19.674006 0.5892552
11 14 10 10 10 69.41143 19.671618 0.5845524
12 15 10 100 10 69.09130 19.821473 0.5894541
13 16 100 10 1999 56.32045 16.370877 0.1940681
14 17 100 50 1999 55.93405 14.007772 0.2272496
par(mfrow=c(1,2)) #plot two graphs (Message Size = 10 and Message Size = 1999) side by side.
msg_10 = subset(df, data_df$msgSize == 10)
msg_10
ExperimentID clients connections msgSize Mean Deviation Error
1 4 50 50 10 73.19379 21.31389 0.2263254
2 5 50 10 10 73.25170 21.45714 0.2265276
3 6 50 100 10 73.20642 21.39648 0.2261432
10 13 10 50 10 68.80108 19.67401 0.5892552
11 14 10 10 10 69.41143 19.67162 0.5845524
12 15 10 100 10 69.09130 19.82147 0.5894541
plot(msg_10$Mean ~ msg_10$clients, col=as.factor(msg_10$connections), pch=19, xlab="Clients", ylab="Mean", xlim=c(0,60), ylim=c(68,74), main="Message Size = 10",cex.main=0.85)
legend("bottomright", legend=unique(msg_10$connections), col=as.factor(msg_10$connections),pch=19,title="connections")
msg_1999 = subset(df, data_df$msgSize == 1999)
msg_1999
ExperimentID clients connections msgSize Mean Deviation Error
4 7 50 50 1999 53.75247 11.863616 0.1695395
5 8 50 10 1999 53.88464 12.778998 0.2234775
6 9 50 100 1999 53.99422 11.947930 0.2085102
7 10 10 50 1999 49.74034 9.296995 0.3855425
8 11 10 10 1999 49.77624 8.639379 0.3566724
9 12 10 100 1999 50.30912 10.800443 0.4442306
13 16 100 10 1999 56.32045 16.370877 0.1940681
14 17 100 50 1999 55.93405 14.007772 0.2272496
plot(msg_1999$Mean ~ msg_1999$clients, col=as.factor(msg_1999$connections), pch=19, xlab="Clients", ylab="Mean", xlim=c(0,100), ylim=c(48,58), main="Message Size = 1999",cex.main=0.85)
legend("bottomright", legend=unique(msg_1999$connections), col=as.factor(msg_1999$connections),pch=19,title="connections")
这是输出:
修改即可。我忽略了ggplot2标签。所以这是另一种选择:
library(ggplot2)
#Message Size = 10
ggplot(data=msg_10,aes(clients,Mean)) + geom_point(data=msg_10,aes(color=as.factor(connections)),size=5) + theme_bw() + labs(title="Message Size = 10", color="Connections")
#Message Size = 1999
ggplot(data=msg_1999,aes(clients,Mean)) + geom_point(data=msg_1999,aes(color=as.factor(connections)),size=5) + theme_bw() + labs(title="Message Size = 1999", color="Connections")
第二张图表(Message Size = 1999)如下所示: