hth1是我已经拥有的数据框。
> hth1
Source: local data frame [13 x 14]
Groups: team [13]
team CSK DC DD GL KKR KTK KXIP MI PW RCB RPSG
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 CSK 0 8 11 0 11 2 9 10 4 10 0
2 DC 2 0 8 0 2 1 7 5 3 8 0
3 DD 5 3 0 0 7 2 8 5 2 10 2
4 GL 0 0 2 0 0 0 0 0 0 1 0
5 KKR 5 7 10 2 0 0 5 10 3 15 0
6 KTK 0 0 0 0 2 0 1 0 1 2 0
7 KXIP 8 3 10 2 14 0 0 11 2 6 1
8 MI 12 5 13 2 8 1 7 0 3 11 1
9 PW 2 1 4 0 2 0 4 3 0 1 0
10 RCB 9 3 7 2 3 0 12 8 4 0 1
11 RPSG 0 0 0 2 2 0 1 1 0 1 0
12 RR 8 2 7 0 14 1 7 6 2 7 0
13 SH 3 0 4 0 5 0 4 5 2 5 2
# ... with 2 more variables: RR <dbl>, SH <dbl>
为什么bind_rows()和原始数据帧返回的数据帧不同?
> h <- list(hth1)
> hth_b1 <- bind_rows(h)
> identical(hth1, hth_b1)
[1] FALSE
> class(hth_b1)
[1] "grouped_df" "tbl_df" "tbl" "data.frame"
> class(hth1)
[1] "grouped_df" "tbl_df" "tbl" "data.frame"
> setequal(hth1, hth_b1)
TRUE
> anti_join(hth1, hth_b1)
Joining, by = c("team", "CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI", "PW", "RCB", "RPSG", "RR", "SH")
Source: local data frame [0 x 14]
Groups: team [13]
# ... with 14 variables: team <chr>, CSK <dbl>, DC <dbl>, DD <dbl>, GL <dbl>,
# KKR <dbl>, KTK <dbl>, KXIP <dbl>, MI <dbl>, PW <dbl>, RCB <dbl>,
# RPSG <dbl>, RR <dbl>, SH <dbl>
我错过了什么?我被困在这里已经很久了。
根据Benjamin的要求,我在两个数据帧上都有dput()函数。这是输出。
> dput(hth_b1)
structure(list(team = c("CSK", "DC", "DD", "GL", "KKR", "KTK",
"KXIP", "MI", "PW", "RCB", "RPSG", "RR", "SH"), CSK = c(0, 2,
5, 0, 5, 0, 8, 12, 2, 9, 0, 8, 3), DC = c(8, 0, 3, 0, 7, 0, 3,
5, 1, 3, 0, 2, 0), DD = c(11, 8, 0, 2, 10, 0, 10, 13, 4, 7, 0,
7, 4), GL = c(0, 0, 0, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0), KKR = c(11,
2, 7, 0, 0, 2, 14, 8, 2, 3, 2, 14, 5), KTK = c(2, 1, 2, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0), KXIP = c(9, 7, 8, 0, 5, 1, 0, 7, 4,
12, 1, 7, 4), MI = c(10, 5, 5, 0, 10, 0, 11, 0, 3, 8, 1, 6, 5
), PW = c(4, 3, 2, 0, 3, 1, 2, 3, 0, 4, 0, 2, 2), RCB = c(10,
8, 10, 1, 15, 2, 6, 11, 1, 0, 1, 7, 5), RPSG = c(0, 0, 2, 0,
0, 0, 1, 1, 0, 1, 0, 0, 2), RR = c(9, 7, 9, 0, 1, 1, 8, 10, 3,
9, 0, 0, 7), SH = c(3, 0, 4, 3, 4, 0, 4, 3, 0, 4, 0, 0, 0)), .Names = c("team",
"CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI", "PW", "RCB",
"RPSG", "RR", "SH"), row.names = c(NA, -13L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), vars = list(team), indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), group_sizes = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
team = c("CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI",
"PW", "RCB", "RPSG", "RR", "SH")), row.names = c(NA, -13L
), class = "data.frame", vars = list(team), .Names = "team"))
>
> dput(hth1)
structure(list(team = c("CSK", "DC", "DD", "GL", "KKR", "KTK",
"KXIP", "MI", "PW", "RCB", "RPSG", "RR", "SH"), CSK = c(0, 2,
5, 0, 5, 0, 8, 12, 2, 9, 0, 8, 3), DC = c(8, 0, 3, 0, 7, 0, 3,
5, 1, 3, 0, 2, 0), DD = c(11, 8, 0, 2, 10, 0, 10, 13, 4, 7, 0,
7, 4), GL = c(0, 0, 0, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0), KKR = c(11,
2, 7, 0, 0, 2, 14, 8, 2, 3, 2, 14, 5), KTK = c(2, 1, 2, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0), KXIP = c(9, 7, 8, 0, 5, 1, 0, 7, 4,
12, 1, 7, 4), MI = c(10, 5, 5, 0, 10, 0, 11, 0, 3, 8, 1, 6, 5
), PW = c(4, 3, 2, 0, 3, 1, 2, 3, 0, 4, 0, 2, 2), RCB = c(10,
8, 10, 1, 15, 2, 6, 11, 1, 0, 1, 7, 5), RPSG = c(0, 0, 2, 0,
0, 0, 1, 1, 0, 1, 0, 0, 2), RR = c(9, 7, 9, 0, 1, 1, 8, 10, 3,
9, 0, 0, 7), SH = c(3, 0, 4, 3, 4, 0, 4, 3, 0, 4, 0, 0, 0)), .Names = c("team",
"CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI", "PW", "RCB",
"RPSG", "RR", "SH"), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -13L), vars = list(team), labels = structure(list(
team = c("CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI",
"PW", "RCB", "RPSG", "RR", "SH")), class = "data.frame", row.names = c(NA,
-13L), vars = list(team), drop = TRUE, .Names = "team"), indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), drop = TRUE, group_sizes = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L)
两者的输出存在差异,hth1的额外下降= TRUE。
我不明白为什么它不存在于另一个中。
答案 0 :(得分:1)
可重现的例子:
library(tidyverse)
test1 <- mtcars %>% group_by(cyl)
test2 <- bind_rows(list(test1))
identical(test1, test2) #FALSE
all_equal(test1, test2) #TRUE
您可以同时检查attributes
,并且可以看到rownames不同:
rownames(test1)
[1] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" [4] "Hornet 4 Drive" "Hornet Sportabout" "Valiant" [7] "Duster 360" "Merc 240D" "Merc 230" [10] "Merc 280" "Merc 280C" "Merc 450SE" [13] "Merc 450SL" "Merc 450SLC" "Cadillac Fleetwood" [16] "Lincoln Continental" "Chrysler Imperial" "Fiat 128" [19] "Honda Civic" "Toyota Corolla" "Toyota Corona" [22] "Dodge Challenger" "AMC Javelin" "Camaro Z28" [25] "Pontiac Firebird" "Fiat X1-9" "Porsche 914-2" [28] "Lotus Europa" "Ford Pantera L" "Ferrari Dino" [31] "Maserati Bora" "Volvo 142E"
rownames(test2)
[1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" [14] "14" "15" "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" [27] "27" "28" "29" "30" "31" "32"
永远不要指望吃饭会尊重你的rownames,他们可能会随时默默地放弃。
答案 1 :(得分:0)
原谅这个答案的格式,但看起来你的标签贴在一个对象上,而不是在另一个对象上。如果没有查看生成对象的代码,那么标签被附加或删除的地方就不是我能够知道的。我在下面的对象中加粗了差异。
注意:不要将其格式化为代码是一个慎重的选择。格式化为代码使我无法用粗体文本标记结构中的差异
dput(hth_b1) 结构(列表(团队= c(“CSK”,“DC”,“DD”,“GL”,“KKR”,“KTK”, “KXIP”,“MI”,“PW”,“RCB”,“RPSG”,“RR”,“SH”),CSK = c(0,2, 5,0,5,0,8,12,2,9,0,0,3),DC = c(8,0,3,0,7,0,3, 5,1,3,0,2,0),DD = c(11,8,0,2,10,0,10,13,4,7,0, 7,4),GL = c(0,0,0,0,2,0,2,2,0,2,2,0,0),KKR = c(11, 2,7,7,0,2,14,8,2,3,2,14,5),KTK = c(2,1,2,0,0, 0,0,1,0,0,0,1,0),KXIP = c(9,7,8,0,5,1,0,7,4, 12,1,7,4),MI = c(10,5,5,0,10,0,11,0,3,8,1,6,5) ),PW = c(4,3,2,0,3,1,2,3,0,4,0,2,2),RCB = c(10, 8,10,1,15,2,6,11,1,0,1,7,5),RPSG = c(0,0,2,0, 0,0,1,1,0,1,0,0,2),RR = c(9,7,9,0,1,1,8,10,3, 9,0,0,7),SH = c(3,0,4,3,4,0,4,3,0,4,0,0,0))。。Name = c(“team”, “CSK”,“DC”,“DD”,“GL”,“KKR”,“KTK”,“KXIP”,“MI”,“PW”,“RCB”, “RPSG”,“RR”,“SH”),row.names = c(NA,-13L),class = c(“grouped_df”, “tbl_df”,“tbl”,“data.frame”),vars = list(team),indices = list( 0L,1L,2L,3L,4L,5L,6L,7L,8L,9L,10L,11L,12L),group_sizes = c(1L, 1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L),largest_group_size = 1L ,标签=结构(列表( team = c(“CSK”,“DC”,“DD”,“GL”,“KKR”,“KTK”,“KXIP”,“MI”, “PW”,“RCB”,“RPSG”,“RR”,“SH”)),row.names = c(NA,-13L ),class =“data.frame”,vars = list(team),. Name =“team”))
dput(hth1) 结构(列表(团队= c(“CSK”,“DC”,“DD”,“GL”,“KKR”,“KTK”, “KXIP”,“MI”,“PW”,“RCB”,“RPSG”,“RR”,“SH”),CSK = c(0,2, 5,0,5,0,8,12,2,9,0,0,3),DC = c(8,0,3,0,7,0,3, 5,1,3,0,2,0),DD = c(11,8,0,2,10,0,10,13,4,7,0, 7,4),GL = c(0,0,0,0,2,0,2,2,0,2,2,0,0),KKR = c(11, 2,7,7,0,2,14,8,2,3,2,14,5),KTK = c(2,1,2,0,0, 0,0,1,0,0,0,1,0),KXIP = c(9,7,8,0,5,1,0,7,4, 12,1,7,4),MI = c(10,5,5,0,10,0,11,0,3,8,1,6,5) ),PW = c(4,3,2,0,3,1,2,3,0,4,0,2,2),RCB = c(10, 8,10,1,15,2,6,11,1,0,1,7,5),RPSG = c(0,0,2,0, 0,0,1,1,0,1,0,0,2),RR = c(9,7,9,0,1,1,8,10,3, 9,0,0,7),SH = c(3,0,4,3,4,0,4,3,0,4,0,0,0))。。Name = c(“team”, “CSK”,“DC”,“DD”,“GL”,“KKR”,“KTK”,“KXIP”,“MI”,“PW”,“RCB”, “RPSG”,“RR”,“SH”),class = c(“grouped_df”,“tbl_df”,“tbl”, “data.frame”),row.names = c(NA,-13L),vars = list(team),labels = structure(list(list) team = c(“CSK”,“DC”,“DD”,“GL”,“KKR”,“KTK”,“KXIP”,“MI”, “PW”,“RCB”,“RPSG”,“RR”,“SH”)),class =“data.frame”,row.names = c(NA, -13L),vars = list(team),drop = TRUE,.Names =“team”),indices = list( 0L,1L,2L,3L,4L,5L,6L,7L,8L,9L,10L,11L,12L),drop = TRUE,group_sizes = c(1L, 1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L),largest_group_size = 1L)
在下面的示例中,我将向mtcars
数据框添加标签,然后通过bind_rows
运行,您将看到标签不再存在。这就是我认为您的数据正在发生的事情。
library(Hmisc)
mtcars2 <- mtcars
label(mtcars2, self = FALSE) <- toupper(names(mtcars))
library(dplyr)
mtcars3 <- bind_rows(mtcars2)
identical(mtcars2, mtcars3)
label(mtcars3)