如何在保持列名称顺序的同时扩展tidyr :: spread()?

时间:2017-09-24 13:09:56

标签: r tidyr tidyverse

在使用扩散函数时如何在维持数字等级时进行扩展?

library(tidyverse)

data.frame(time = paste0("t_", 1:100)) %>% 
  rowwise() %>% 
  mutate(rnd = sample(1:100, size=1)) %>% 
  spread(time, rnd)

上面显示的代码执行结果的列名是t_1, t_11, t_100, ....。 我想按数字(t_1, t_2, t_3, ...)的顺序获取列名。

2 个答案:

答案 0 :(得分:2)

您可以尝试两件事:

(1)制作"时间"水平与您想要的订单匹配的因素:

data.frame(time = factor(paste0("t_", 1:100), levels = paste0("t_", 1:100))) %>% 
  rowwise() %>% 
  mutate(rnd = sample(1:100, size=1)) %>% 
  spread(time, rnd)

(2)使用select语句强制执行订单:

data.frame(time = paste0("t_", 1:100)) %>% 
  rowwise() %>% 
  mutate(rnd = sample(1:100, size=1)) %>% 
  spread(time, rnd) %>% 
  select(paste0("t_", 1:100))

答案 1 :(得分:0)

这是一个保留列顺序的新函数。只需要进行一处小改动(参见注释):

my_spread <- function (data, key, value, fill = NA, convert = FALSE, drop = TRUE, 
          sep = NULL) {
  key_col <- tidyr:::col_name(substitute(key))
  value_col <- tidyr:::col_name(substitute(value))
  tbl_df(my_spread_(data, key_col, value_col, fill = fill, convert = convert, 
                    drop = drop, sep = sep))
}

my_spread_ <- function (data, key_col, value_col, fill = NA, convert = FALSE, 
                       drop = TRUE, sep = NULL) {
  col <- data[key_col]
  #col_id <- tidyr:::id(col, drop = drop)                                   # Old line
  col_id <- seq_len(nrow(data))                                             # New line 1
  attr(col_id, 'n') <- nrow(data)                                           # New line 2
  col_labels <- tidyr:::split_labels(col, col_id, drop = drop)
  rows <- data[setdiff(names(data), c(key_col, value_col))]
  if (length(rows) == 0) {
    row_id <- structure(1L, n = 1L)
    row_labels <- as.data.frame(matrix(nrow = 1, ncol = 0))
  }
  else {
    row_id <- id(rows, drop = drop)
    row_labels <-  tidyr:::split_labels(rows, row_id, drop = drop)
    rownames(row_labels) <- NULL
  }
  overall <- tidyr:::id(list(col_id, row_id), drop = FALSE)
  n <- attr(overall, "n")
  if (anyDuplicated(overall)) {
    groups <- split(seq_along(overall), overall)
    groups <- groups[vapply(groups, length, integer(1)) > 
                       1]
    str <- vapply(
      groups, 
      function(x) paste0("(", paste0(x, collapse = ", "), ")"), character(1)
    )
    stop("Duplicate identifiers for rows ", paste(str, collapse = ", "), 
         call. = FALSE)
  }
  if (length(overall) < n) {
    overall <- match(seq_len(n), overall, nomatch = NA)
  }
  else {
    overall <- order(overall)
  }
  value <- data[[value_col]]
  ordered <- value[overall]
  if (!is.na(fill)) {
    ordered[is.na(ordered)] <- fill
  }
  if (convert && !is.character(ordered)) {
    ordered <- as.character(ordered)
  }
  dim(ordered) <- c(attr(row_id, "n"), attr(col_id, "n"))
  colnames(ordered) <- enc2utf8( tidyr:::col_names(col_labels, sep = sep))
  ordered <- tidyr:::as_data_frame_matrix(ordered)
  if (convert) {
    ordered[] <- lapply(ordered, type.convert, as.is = TRUE)
  }
  tidyr:::append_df(row_labels, ordered)
}