使用R:apply创建n(%)表删除rownames

时间:2013-12-02 13:03:02

标签: r apply

与许多人一样,我需要能够使用n(%)格式制作描述性表格。我已经写了一些R代码(见下文)来做这件事,它几乎按照我希望的方式工作。

我对代码的主要问题是apply函数从frq表/矩阵中剥离Gender的标签和rownames。我可以而且确实把它们放回去但是想知道是否有某种方法可以避免这种情况。

我采用n(%)格式输出的方法看似简单直接,但我也有兴趣看到其他方法来获取我的代码产生的输出类型。

#### Sample data ####

rashData <-
structure(list(Gender = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Male", "Female"), class = "factor"),
    RashGrp = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
    1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L), .Label = c("Yes", "No"), class = "factor")), .Names = c("Gender",
"RashGrp"), row.names = c(NA, 207L), class = "data.frame")

表格代码####

frq <- with(rashData, table(Gender, RashGrp, dnn=c("Gender", "Rash")))
frq <- addmargins(frq, FUN = list(list(Total = sum)), quiet = TRUE)

den  <- which(rownames(frq)!= "Total")
npct <- function(x)
    paste(format(x), " (", format( x/sum(x[den])*100, digits=4, nsmall=2 ), "%)", sep="")

tablObj <- apply(frq, 2, npct)
names(dimnames(tablObj))[1] <- names(dimnames(frq))[1]
rownames(tablObj) <- rownames(frq)

tablObj

3 个答案:

答案 0 :(得分:1)

希望我这样做是正确的。考虑修改我的问题。简单地提供一个建议的答案似乎有点自然。

我收到的三个回复都非常有帮助。托马斯建议使用sprintf让我简化了格式化代码。

从詹姆斯那里,我了解了属性功能。正如我原来的问题所示,我一直在使用诸如rownames,colnames和names(dimnames())之类的东西来完成这个函数的工作。显然,属性功能要快得多,所以我很高兴能够了解它。

DWin的反应也很有帮助。以他建议的方式应用npct函数不会产生正确的百分比。回想起来,我希望我在原始问题中包含了我的代码生成的输出。这会让事情变得更清楚,而且我将来需要更加小心。他指出的[]的使用是非常有价值的,并确实使其成为提出的解决方案的一部分。

建议的解决方案粘贴在下面。即使詹姆斯和德温对此表示权衡,它也会使用申请。我不认为申请的使用存在任何本质上的错误,但可以对此提出反馈意见。我也很乐意收到对代码的任何改进建议。

谢谢,

#### Sample data ####

rashData <-
structure(list(Gender = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Male", "Female"), class = "factor"),
    RashGrp = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
    1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L), .Label = c("Yes", "No"), class = "factor")), .Names = c("Gender",
"RashGrp"), row.names = c(NA, 207L), class = "data.frame")

#### Table code ####

frq <- with(rashData, table(Gender, RashGrp, dnn=c("Gender", "Rash")))
frq <- addmargins(frq, FUN = list(list(Total = sum)), quiet = TRUE)
npct <- function(x) sprintf("%3.0f%s%*.2f%s", x, " (", 6, x/sum(x[rownames(frq)!="Total"])*100, "%)")
frq[] <- apply(frq, 2, npct)
frq

        Rash
Gender   Yes           No            Total        
  Male    30 ( 71.43%)  85 ( 51.52%) 115 ( 55.56%)
  Female  12 ( 28.57%)  80 ( 48.48%)  92 ( 44.44%)
  Total   42 (100.00%) 165 (100.00%) 207 (100.00%)

答案 1 :(得分:0)

问题来自paste,而不是apply来电。但是,您可以使用apply来避免prop.table,然后从attributes复制frq

fmtTbl <- paste0(format(frq), "(", format(rbind(prop.table(frq[den,],2)*100,rep(100,3)),
 digits=4, nsmall=2), "%)")
attributes(fmtTbl) <- attributes(frq)
fmtTbl
        Rash
Gender   Yes           No            Total        
  Male    30 ( 71.43%)  85 ( 51.52%) 115 ( 55.56%)
  Female  12 ( 28.57%)  80 ( 48.48%)  92 ( 44.44%)
  Total   42 (100.00%) 165 (100.00%) 207 (100.00%)

答案 2 :(得分:0)

是:跳过apply并简单地将表对象传递给npct,并使用[]返回对象以保留结构:

frq <- with(rashData, table(Gender, RashGrp, dnn=c("Gender", "Rash")))
mfrq <- addmargins(frq, FUN = list(list(Total = sum)), quiet = TRUE)
npct <- function(x)
    paste(format(x), " (", format( x/sum(frq)*100, digits=4, nsmall=2 ), "%)", sep="")

mfrq[] <-  npct(mfrq)
mfrq
#----------------
        Rash
Gender   Yes            No             Total         
  Male    30 ( 14.493%)  85 ( 41.063%) 115 ( 55.556%)
  Female  12 (  5.797%)  80 ( 38.647%)  92 ( 44.444%)
  Total   42 ( 20.290%) 165 ( 79.710%) 207 (100.000%)