当我尝试使用dplyr的select命令来减少我拥有的列数时,遇到了一些奇怪的事情。我命名为三列,但我一直得到4。除了命令闪回的星际迷航链外,我发现此行为很奇怪,不确定如何解决。另外,为什么会这样呢?
这是我的数据框,表示一天中每个时间段在发生的次数。仅6行数据也很冗长。
library(dplyr)
library(tidyr)
test <- structure(list(Day = c("Dec 10", "Dec 10", "Dec 10", "Dec 10",
"Dec 11", "Dec 11"), Number = c(10L, 10L, 10L, 10L, 11L, 11L),
time = c("08:30", "12:00", "15:30", "19:00", "08:30", "12:00"
), Start = structure(c(1544430600, 1544443200, 1544455800,
1544468400, 1544517000, 1544529600), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), n = c(29L, 74L, 20L, 26L, 29L,
32L)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -6L), vars = c("Day", "Number", "time"), drop = TRUE, indices = list(
0L, 1L, 2L, 3L, 4L, 5L), group_sizes = c(1L, 1L, 1L, 1L,
1L, 1L), biggest_group_size = 1L, labels = structure(list(Day = c("Dec 10",
"Dec 10", "Dec 10", "Dec 10", "Dec 11", "Dec 11"), Number = c(10L,
10L, 10L, 10L, 11L, 11L), time = c("08:30", "12:00", "15:30",
"19:00", "08:30", "12:00")), class = "data.frame", row.names = c(NA,
-6L), vars = c("Day", "Number", "time"), drop = TRUE, indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L,
26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L,
38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L,
50L, 51L, 52L), group_sizes = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
Day = c("Dec 10", "Dec 10", "Dec 10", "Dec 10", "Dec 11",
"Dec 11", "Dec 11", "Dec 11", "Dec 12", "Dec 12", "Dec 12",
"Dec 12", "Dec 13", "Dec 13", "Dec 13", "Dec 13", "Dec 14",
"Dec 14", "Dec 14", "Dec 14", "Dec 15", "Dec 15", "Dec 15",
"Dec 17", "Dec 17", "Dec 17", "Dec 17", "Dec 18", "Dec 18",
"Dec 18", "Dec 18", "Dec 19", "Dec 19", "Dec 19", "Dec 4",
"Dec 4", "Dec 4", "Dec 4", "Dec 5", "Dec 5", "Dec 5", "Dec 5",
"Dec 6", "Dec 6", "Dec 6", "Dec 6", "Dec 7", "Dec 7", "Dec 7",
"Dec 7", "Dec 8", "Dec 8", "Dec 8"), Number = c(10L, 10L,
10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L,
13L, 13L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 17L, 17L, 17L,
17L, 18L, 18L, 18L, 18L, 19L, 19L, 19L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L),
time = c("08:30", "12:00", "15:30", "19:00", "08:30", "12:00",
"15:30", "19:00", "08:30", "12:00", "15:30", "19:00", "08:30",
"12:00", "15:30", "19:00", "08:30", "12:00", "15:30", "19:00",
"08:30", "12:00", "15:30", "08:30", "12:00", "15:30", "19:00",
"08:30", "12:00", "15:30", "19:00", "08:30", "12:00", "15:30",
"08:30", "12:00", "15:30", "19:00", "08:30", "12:00", "15:30",
"19:00", "08:30", "12:00", "15:30", "19:00", "08:30", "12:00",
"15:30", "19:00", "08:30", "12:00", "15:30")), class = "data.frame", row.names = c(NA,
-53L), vars = c("Day", "Number", "time"), drop = TRUE)))
您可以在输出中看到仅列出了3个变量,但奇怪地显示了更多变量。而且,当我选择特定变量或减去其他变量时,它将不起作用。
test %>%
select(Day, time, n)
答案 0 :(得分:1)
在选择变量之前,应先对小标题进行分组,如此处"Adding missing grouping variables" message in dplyr in R所述:
不取消分组:
test %>%
select(Day, time, n)
> test %>%
+ select(Day, time, n)
Adding missing grouping variables: `Number`
# A tibble: 6 x 4
# Groups: Day, Number, time [6]
Number Day time n
<int> <chr> <chr> <int>
1 10 Dec 10 08:30 29
2 10 Dec 10 12:00 74
3 10 Dec 10 15:30 20
4 10 Dec 10 19:00 26
5 11 Dec 11 08:30 29
6 11 Dec 11 12:00 32
取消分组
test %>%
ungroup() %>%
select(Day, time, n)
> test %>%
+ ungroup() %>%
+ select(Day, time, n)
# A tibble: 6 x 3
Day time n
<chr> <chr> <int>
1 Dec 10 08:30 29
2 Dec 10 12:00 74
3 Dec 10 15:30 20
4 Dec 10 19:00 26
5 Dec 11 08:30 29
6 Dec 11 12:00 32
答案 1 :(得分:0)
将数据框取消分组可以解决所有问题。