使用参数计算R函数,参数是数据框的列

时间:2012-10-09 06:26:18

标签: r

我有以下代码。

completemodel <- function(model, colnum)
{
  modlst = c()
  tuplenum = length(model)
  if(tuplenum != 0)
    for(i in 1:tuplenum)
      modlst = c(modlst, model[[i]])
  index = seq(0, colnum-1)
  inddiff = setdiff(index, modlst)
  inddifflen = length(inddiff)
  for(i in seq(length.out=inddifflen))
    model = append(model, inddiff[i])
  return(model)
}

##   Calculate number of parameters in model.
numparam <- function(mod, colnum)
  {
    library(RJSONIO)
    mod = fromJSON(mod)
    mod = completemodel(mod, colnum)
    totnum = 0
    for(tup in mod)
      totnum = totnum +(4**length(tup))
    return(totnum)
  }

x = cbind.data.frame(rownum=c(100, 100), colnum=c(10, 20), modeltrue=c("[]", "[]"), modelresult=c("[[1,2]]","[[1,3]]"), stringsAsFactors=FALSE)

> x
  rows colnum modeltrue modelresult
1  100     10        []     [[1,2]]
2  100     20        []     [[1,3]]

如何操作x给我一个如下所示的数据框?当然在这里 我的意思是例如当我写numparam("[]", 10)时,numparam("[]", 10)

  rownum   colnum    numparam_modeltrue   numparam_modelresult
  100        10      numparam("[]", 10)   numparam("[[1,2]]", 10)
  100        20      numparam("[]", 20)   numparam("[[1,3]]", 20)

某些版本的apply函数可能有效,但我在找到合适的配方时遇到了问题。

更新:似乎如果rownnumcolnum元组不是唯一的,那么可以执行以下操作。

x = cbind.data.frame(id=c(1, 2, 3), rownum=c(100, 100, 100), colnum=c(10, 20, 20), modeltrue=c("[]", "[]", "[]"),
  modelresult=c("[[1,2]]","[[1,3]]","[[1,3, 4]]"), stringsAsFactors = FALSE)

##Then, create a data.table and set the key

library(data.table)
xDT <- as.data.table(x)
setkeyv(xDT, c("id", "rownum", "colnum")

这是正确的方法吗?

3 个答案:

答案 0 :(得分:3)

如果您对其开放,则可以使用data.table包。

首先,创建一个data.table,添加一个唯一标识符列id并将其设置为键

library(data.table)
xDT <- as.data.table(x)
xDT[, id := seq_len(nrow(xDT))]
setkey(xDT, "id")

然后,使用do.call,您可以在相应的列上运行numparam函数:

res1 <- xDT[, list(numparam_modeltrue = do.call(numparam, unname(.SD))),
  .SDcols = c(3, 2), by = key(xDT)]
res2 <- xDT[, list(numparam_modelresult = do.call(numparam, unname(.SD))),
  .SDcols = c(4, 2), by = key(xDT)]

然后将结果合并到data.table

xDT[res1][res2][, c("modeltrue", "modelresult") := NULL, with = FALSE]
   id rownum colnum numparam_modeltrue numparam_modelresult
1:  1    100     10                 40                   48
2:  2    100     20                 80                   88

修改

正如Matthew Dowle建议的那样,你最终可以达到相同的结果而不是最后的结果:

xDT[,numparam_modeltrue := do.call(numparam, unname(.SD)),
  .SDcols = c(3, 2), by = key(xDT)]
xDT[,numparam_modelresult := do.call(numparam, unname(.SD)),
  .SDcols = c(4, 2), by = key(xDT)]

如果你想删除列modeltruemodelresult

xDT[,c("modeltrue", "modelresult") := NULL, with = FALSE]
# NOTE that with = FALSE shouldn't be necessary with data.table 1.8.3
# But I'm still with 1.8.2

答案 1 :(得分:1)

以下代码的工作原理。不过,它不是很漂亮。欢迎提出改进建议。特别是, 不必转置矩阵并添加列名称也是很好的,而且,因为它返回一个矩阵,所以仍然 整数转换为字符串的烦人问题。感谢flodel 有关his answer to "Pass arguments to a function from each row of a matrix"的提示。

completemodel <- function(model, colnum)
{
  modlst = c()
  tuplenum = length(model)
  if(tuplenum != 0)
    for(i in 1:tuplenum)
      modlst = c(modlst, model[[i]])
  index = seq(0, colnum-1)
  inddiff = setdiff(index, modlst)
  inddifflen = length(inddiff)
  for(i in seq(length.out=inddifflen))
    model = append(model, inddiff[i])
  return(model)
}

##   Calculate number of parameters in model.
numparam <- function(mod, colnum)
  {
    library(RJSONIO)
    mod = fromJSON(mod)
    print(paste("mod", mod))
    mod = completemodel(mod, colnum)
    totnum = 0
    for(tup in mod)
      totnum = totnum +(4**length(tup))
    return(totnum)
  }

numparamvec <- function(rownum, colnum, modeltrue, modelresult)
  {
    totnum1 = numparam(modeltrue, as.integer(colnum))
    totnum2 = numparam(modelresult, as.integer(colnum))
    return(c(rownum, colnum, totnum1, totnum2))
  }

x = cbind.data.frame(rownum=c(100, 100), colnum=c(10, 20), modeltrue=c("[]", "[]"), modelresult=c("[[1,2]]","[[1,3]]"), stringsAsFactors=FALSE)
val = t(apply(x, 1, function(x)do.call(numparamvec, as.list(x))))
colnames(val) = c("rownum", "colnum", "numparam_modeltrue", "numparam_modelresult")

答案 2 :(得分:1)

使用sapply的替代方法:

numparamvec <- function(rownum, colnum, modeltrue, modelresult)
  {
    totnum1 = numparam(modeltrue, as.integer(colnum))
    totnum2 = numparam(modelresult, as.integer(colnum))
    return(c(rownum = rownum, colnum = colnum,
      numparam_modeltrue = totnum1, numparam_modelresult = totnum2))
  }

val <- sapply(seq_len(nrow(x)),
  function(y) do.call(numparamvec, x[y, ]))

> as.data.frame(t(val))
  rownum colnum numparam_modeltrue numparam_modelresult
1    100     10                 40                   48
2    100     20                 80                   88

使用vapply的替代方法:

val <- t(vapply(seq_len(nrow(x)), function(y) do.call(numparamvec, x[y, ]),
  c(rownum = 0, colnum = 0, numparam_modeltrue = 0, numparam_modelresult = 0)))

> val
     rownum colnum numparam_modeltrue numparam_modelresult
[1,]    100     10                 40                   48
[2,]    100     20                 80                   88