documentFracation <- c(0.164, 0.196, 0.102, 0.166, 0.145, 0.017,
0.144, 0.258, 0.139, 0.019, 0.155, 0.013,
0.001,0.099,0.007)
tsSTDCommoncrawl <- c(19,21,23,30,33,34,
38,52,54,65,90,123,
180,181,1014)
average_dice <- c(0.495, 0.505, 0.495, 0.615, 0.48, 0.385,
0.5, 0.555, 0.4, 0.33, 0.405, 0.33,
0.19, 0.32, 0.145)
std_dice <- c(0.278, 0.213, 0.240, 0.184, 0.175,
0.240, 0.282, 0.261, 0.262, 0.2188,
0.2191, 0.1989, 0.143, 0.1874, 0.086)
data2 <- data.frame(type=crawls, df=documentFracation, ts=tsSTDCommoncrawl,
avgdice=average_dice)
# generate scatter plot chart by crawl type with size of point corresponding to max dice value
p <- ggplot() +
geom_point(data=data2, aes(x=df, y=ts, size=avgdice, fill = std_dice), shape=21)
p <- p + scale_y_continuous(trans = 'log10')
# add labels besides points
p <- p + geom_text(data=data2, aes(x=df, y=ts, label=avgdice), size=2, hjust=0.5,vjust= -2)
#add scaled colr as paired color from brewer
#p <- p + scale_color_manual(values=colors)
# legend title
p <- p + guides(fill =
guide_legend(title = "",
label.position = "right",
#keywidth=0.25,
#keyheight=0.2,
default.unit="inch")) + theme(legend.position="right")
p <- p + xlab("document fraction between commoncrawl and directcrawl") +
ylab("timestamp interval standard deviation in commoncrawl (log10 scale)") +
ggtitle("Average Dice value with document fraction \nand timestamp inverval variation") +
#mar=c(top,right,bottom,left)
theme(plot.margin = unit(c(1,1,1,1),"cm"))+
theme(plot.title = element_text(hjust = 0.5))
print(p)
在图的右侧,为什么颜色代码图例是点而不是条?为什么我有“avgdice”的传说而不是颜色(我使用std_dice来填充颜色)?我希望颜色图例如下所示:
感谢您的帮助!