我遇到了ggplot绘制两个不同数据集的直方图的问题。
想象一下,我有两张桌子:
histClick = {
id length
1 4
2 6
3 3
4 2
5 2
6 2
7 2
8 3
9 12
10 2
11 3
12 3
13 3
14 2
15 2
16 4
}
histTest = {
id length
1 3
2 2
3 3
4 4
5 6
6 2
7 4
8 2
}
我在每个表中添加了另一列:
histClick[, "cat"] <- "Click"
histTest[, "cat"] <- "Test"`
这是问题所在。当我想在一个数据框中将这两个表绑定在一起时,看起来顺序变得很重要并影响输出。 ggplot的输出在以下total <- rbind(histTest, histClick)
和total <- rbind(histClick, histTest)
之间有所不同:
sessions <- as.factor(total$cat)
ggplot(total, aes(total$length, fill = sessions)) +
geom_histogram(binwidth = .2) +
facet_grid(cat ~ ., margins = TRUE, scales = "free")
total <- rbind(histClick, histTest)
:
total <- rbind(histTest, histClick)
:
我的问题: 1-为什么我应该以{{1}}的方式获得不同的结果?订单有关系吗?
2-如何按照我提供数据的顺序得到正确答案?
任何帮助都会非常感激!
答案 0 :(得分:2)
R的回收规则产生了差异。您组合的两个数据帧的长度不同。为了显示差异,我使用了以下两个数据框,唯一的区别是rbind
的顺序:
dput(total2)
structure(list(id = structure(c(1L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 9L, 10L, 11L, 12L,
13L, 14L, 15L), .Label = c("1", "10", "11", "12", "13", "14",
"15", "16", "2", "3", "4", "5", "6", "7", "8", "9", "id"), class = "factor"),
length = c(4, 5, 3, 2, 2, 2, 2, 3, 1, 2, 3, 3, 3, 2, 2, 4,
3, 2, 3, 4, 5, 2, 4, 2), cat = c("Click", "Click", "Click",
"Click", "Click", "Click", "Click", "Click", "Click", "Click",
"Click", "Click", "Click", "Click", "Click", "Click", "Test",
"Test", "Test", "Test", "Test", "Test", "Test", "Test")), .Names = c("id",
"length", "cat"), row.names = c("2", "3", "4", "5", "6", "7",
"8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "21",
"31", "41", "51", "61", "71", "81", "91"), class = "data.frame")
dput(total)
structure(list(id = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 17L, 10L, 11L, 12L, 13L, 14L,
15L, 16L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8",
"id", "10", "11", "12", "13", "14", "15", "16", "9"), class = "factor"),
length = c(2, 1, 2, 3, 4, 1, 3, 1, 3, 4, 2, 1, 1, 1, 1, 2,
6, 1, 2, 2, 2, 1, 1, 3), cat = c("Test", "Test", "Test",
"Test", "Test", "Test", "Test", "Test", "Click", "Click",
"Click", "Click", "Click", "Click", "Click", "Click", "Click",
"Click", "Click", "Click", "Click", "Click", "Click", "Click"
)), .Names = c("id", "length", "cat"), row.names = c("2",
"3", "4", "5", "6", "7", "8", "9", "21", "31", "41", "51", "61",
"71", "81", "91", "10", "11", "12", "13", "14", "15", "16", "17"
), class = "data.frame")
然后我创建了两个面板图:
sessions <- as.factor(total2$cat)
ggplot(total2, aes(x = id, y = length, fill = cat)) +
geom_histogram(stat = "identity") +
facet_grid(cat ~ ., margins = TRUE, scales = "free")
sessions <- as.factor(total$cat)
ggplot(total, aes(total$length, fill = sessions)) +
geom_histogram(binwidth = .2) +
facet_grid(cat ~ ., margins = TRUE, scales = "free")