如何编写一个将long转换为wide的公式方法

时间:2018-05-22 13:45:34

标签: r class methods tidyr reshape2

twoway package中,我有一个twoway.default()方法,它采用矩阵或数据框并应用Tukey的方法来分析双向表。

示例:

> data(taskRT)
> taskRT
       topic1 topic2 topic3 topic4
Easy     2.43   3.12   3.68   4.04
Medium   3.41   3.91   4.07   5.10
Hard     4.21   4.65   5.87   5.69
> twoway(taskRT)

Mean decomposition (Dataset: "taskRT")
Residuals bordered by row effects, column effects, and overall

         topic1    topic2    topic3    topic4      roweff   
       + --------- --------- --------- --------- + ---------
Easy   | -0.055833  0.090833  0.004167 -0.039167 : -0.864167
Medium |  0.119167  0.075833 -0.410833  0.215833 : -0.059167
Hard   | -0.063333 -0.166667  0.406667 -0.176667 :  0.923333
       + ......... ......... ......... ......... + .........
coleff | -0.831667 -0.288333  0.358333  0.761667 :  4.181667

我希望使用一个公式方法扩展它,该公式方法采用数据框和response ~ row + column形式的公式,将其从long变为宽,然后调用默认方法。我知道有几种方法可以直接在控制台中执行此操作,但我似乎无法在公式方法函数中使用它们。

因此,对于长格式的数据,名为RT的单元格值以及tasktopic的行和列变量,我希望得到调用

时的结果相同
twoway(RT ~ task + topic, data=long)

在顶层,在控制台中,我可以通过各种方式执行此操作,从相同数据的long版本开始。

library(reshape2)
long <- melt(as.matrix(taskRT))
colnames(long) <- c("task", "topic", "RT")

转换回宽格式,并在其上调用twoway()

# convert wide to long: dcast
(wide <- dcast(long, task ~ topic, value.var="RT"))
twoway(wide[,-1])

# tidyr::spread
library(tidyr)
(wide <- spread(long, key=topic, value=RT))
twoway(wide[,-1])

# base, unstack
wide <- unstack(long, form = RT ~ topic)
rownames(wide) <- unique(long$task)
twoway(wide)

以下是twoway.formula方法的初始草图。我遇到的问题是我无法弄清楚如何使用解析公式对象的结果和函数中的相关数据框来在函数中构造调用,从而产生一个宽矩阵或适合传递给默认方法的数据框。到目前为止,我已经在函数中尝试了各种形式的dcast,显示为评论,但没有一个给我带来快乐。

#' Initial sketch for a twoway formula method
#'
#' Doesn't do anything useful yet, but the idea is to be able to use a
#' formula for a twoway table in long form, e.g.,
#' twoway(response ~ row + col, data=mydata)
#'
#' @param formula A formula of the form \code{response ~ rowvar + colVAR}
#' @param data The name of the data set
#' @param subset An expression to subset the data (unused)
#' @param na.action What to do with NAs? (unused)
#' @param ... other arguments, passed down
#' @importFrom stats terms
#'
twoway.formula <- function(formula, data, subset, na.action, ...) {

  if (missing(formula) || !inherits(formula, "formula"))
    stop("'formula' missing or incorrect")
  if (length(formula) != 3L)
    stop("'formula' must have both left and right hand sides")
  tt <- if (is.data.frame(data))
    terms(formula, data = data)
  else terms(formula)
  if (any(attr(tt, "order") > 1))
    stop("interactions are not allowed")

  rvar <- attr(terms(formula[-2L]), "term.labels")
  lvar <- attr(terms(formula[-3L]), "term.labels")
  rhs.has.dot <- any(rvar == ".")
  lhs.has.dot <- any(lvar == ".")
  if (lhs.has.dot || rhs.has.dot)
    stop("'formula' has '.' in left or right hand sides")
  m <- match.call(expand.dots = FALSE)
  edata <- eval(m$data, parent.frame())
  lhs <- formula[[2]]
  rhs <- formula[[3]]

  #  wide <- dcast(data=edata, formula=as.formula(rhs), value.var=lhs )
  #  wide <- dcast(data=edata, value.var=lhs)
  #  wide <- dcast(data=edata, rvar[1] ~ rvar[2], value.var=cvar)
  #  wide <- dcast(data=edata, list(.(rvar[1], .(rvar[2], .(cvar)))))
#browser()
  stop("The formula method is not yet implemented.")

  # call the default method on the wide data set
  twoway(wide)
}

有人可以帮忙吗?

1 个答案:

答案 0 :(得分:1)

使用tidyverse ...

library(tibble)
library(tidyr)
library(dplyr)

twoway.formula <- function(formula, data, subset, na.action, ...) {

  if (missing(formula) || !inherits(formula, "formula"))
    stop("'formula' missing or incorrect")
  if (length(formula) != 3L)
    stop("'formula' must have both left and right hand sides")
  tt <- if (is.data.frame(data)) {
    terms(formula, data = data)
  } else { terms(formula) }
  if (any(attr(tt, "order") > 1))
    stop("interactions are not allowed")

  rvar <- attr(terms(formula[-2L]), "term.labels")
  lvar <- attr(terms(formula[-3L]), "term.labels")
  rhs.has.dot <- any(rvar == ".")
  lhs.has.dot <- any(lvar == ".")
  if (lhs.has.dot || rhs.has.dot)
    stop("'formula' has '.' in left or right hand sides")
  m <- match.call(expand.dots = FALSE)
  edata <- eval(m$data, parent.frame())
  lhs <- formula[[2]]
  rhs <- formula[[3]]

  wide <- 
    edata %>% 
    select(one_of(rvar, lvar)) %>% 
    spread(key = rvar[2], value = lvar) %>% 
    column_to_rownames(rvar[1])

  # call the default method on the wide data set
  twoway(wide)
}


library(twoway)
data(taskRT)

library(reshape2)
long <- melt(as.matrix(taskRT))
colnames(long) <- c("task", "topic", "RT")

twoway(taskRT)

twoway(RT ~ task + topic, data = long)