创建摘要功能

Question

我正在寻找一种方法，为函数中的不同变量使用不同的比例。

这是来自A simpler way to achieve a frequency count with mean, sum, length and sd in R

的后续问题

鉴于

    # create the summary function
    summaryStatistics <- function(x,levels) {
        xx <- na.omit(x)
        c(table(factor(x, levels=levels), useNA='always', exclude=NULL),
          sum=sum(xx), 
          length=length(x), 
          mean=mean(xx), 
          standard.deviation=sqrt(var(xx)), 
          var=(var(xx)), 
          median=median(xx), 
          min=min(xx), 
          max=max(xx),
          quantile=quantile(xx),
          skew=sum((xx-mean(xx))^3/sqrt(var(xx))^3)/length(x) ,
          kurtosis=sum((xx-mean(xx))^4/sqrt(var(xx))^4)/length(x) - 3 
        )
    }

    # create the test data frame
    Id <- c(1,2,3,4,5,6,7,8,9,10)
    ClassA <- c(1,NA,3,1,1,2,1,4,5,3)
    ClassB <- c(2,1,1,3,3,2,1,1,3,3)
    R <- c(1,2,3,NA,9,2,4,5,6,7)
    S <- c(3,7,NA,9,5,8,7,NA,7,6)
    df <- data.frame(Id,ClassA,ClassB,R,S)

    ClassAAnswers <- c(1:5,NA)
    ClassBAnswers <- c(1:5,NA)
    RAnswers <- c(0:10,NA);
    SAnswers <- c(0:20,NA);

    # create the result
    result <- setNames(
        nm=c('answer','question','value'),
        as.data.frame(
            as.table(
                simplify2array(
                    lapply(
                        df[c('R', 'S')], 
                        summaryStatistics,
                        RAnswers
                    )
                )
            )
        )
    )

    # change the order to question, answer, value
    result <- result[, c(2, 1, 3)]

    # add the filter
    result <- cbind(filter='None',result)

    # return the result
    result

我得到了

      filter question             answer      value
    1    None        R                  0  0.0000000
    2    None        R                  1  1.0000000
    3    None        R                  2  2.0000000
    4    None        R                  3  1.0000000
    5    None        R                  4  1.0000000
    6    None        R                  5  1.0000000
    7    None        R                  6  1.0000000
    8    None        R                  7  1.0000000
    9    None        R                  8  0.0000000
    10   None        R                  9  1.0000000
    11   None        R                 10  0.0000000
    12   None        R               <NA>  1.0000000
    13   None        R                sum 39.0000000
    14   None        R             length 10.0000000
    15   None        R               mean  4.3333333
    16   None        R standard.deviation  2.6457513
    17   None        R                var  7.0000000
    18   None        R             median  4.0000000
    19   None        R                min  1.0000000
    20   None        R                max  9.0000000
    21   None        R        quantile.0%  1.0000000
    22   None        R       quantile.25%  2.0000000
    23   None        R       quantile.50%  4.0000000
    24   None        R       quantile.75%  6.0000000
    25   None        R      quantile.100%  9.0000000
    26   None        R               skew  0.3275692
    27   None        R           kurtosis -1.5333333
    28   None        S                  0  0.0000000
    29   None        S                  1  0.0000000
    30   None        S                  2  0.0000000
    31   None        S                  3  1.0000000
    32   None        S                  4  0.0000000
    33   None        S                  5  1.0000000
    34   None        S                  6  1.0000000
    35   None        S                  7  3.0000000
    36   None        S                  8  1.0000000
    37   None        S                  9  1.0000000
    38   None        S                 10  0.0000000
    39   None        S               <NA>  2.0000000
    40   None        S                sum 52.0000000
    41   None        S             length 10.0000000
    42   None        S               mean  6.5000000
    43   None        S standard.deviation  1.8516402
    44   None        S                var  3.4285714
    45   None        S             median  7.0000000
    46   None        S                min  3.0000000
    47   None        S                max  9.0000000
    48   None        S        quantile.0%  3.0000000
    49   None        S       quantile.25%  5.7500000
    50   None        S       quantile.50%  7.0000000
    51   None        S       quantile.75%  7.2500000
    52   None        S      quantile.100%  9.0000000
    53   None        S               skew -0.4252986
    54   None        S           kurtosis -1.3028646

S的答案从0到10缩放。

我认为关键是拉扯。

lapply(df[c('R', 'S')], summaryStatistics, c(0:20))

为R和S生成缩放0到20的结果。

lapply(df[c('R', 'S')], summaryStatistics, c(0:10))

为R和S生成从0到10的结果。

lapply(df[c('R', 'S')], summaryStatistics, c(0:20,0:10))

在第一个比例中给出结果，在第二个比例中给出结果，但有一些警告。

警告讯息：

1：在levels<-（*tmp*，值= if（nl == nL）as.character（标签），否则粘贴0（标签，：不推荐使用因子中的重复级别

2：在levels<-（*tmp*，值= if（nl == nL）as.character（标签），否则粘贴0（标签，：不推荐使用因子中的重复级别

3：在levels<-（*tmp*，值= if（nl == nL）as.character（标签），否则为paste0（标签，：不推荐使用因子中的重复级别

4：在levels<-（*tmp*，值= if（nl == nL）as.character（标签），否则粘贴0（标签，：不推荐使用因子中的重复级别

如何更改摘要功能，以便我可以传递R的比例和S的比例，并为每个变量获得一组比例结果？

Answer 1

这是调整原始功能的非常粗略的尝试：

summaryStatistics <- function(df, a, b, levels1, levels2) {
    x <- df[,a]
    y <- df[,b]
    xx <- na.omit(x)
    yy <- na.omit(y)
    levels2 <- levels2[levels2 != 0]
    answer1 <- c(levels1, "<NA>", "sum", "length", "mean", "standard.deviation", "var", "median", "min", "max", "quantile.0", "quantile.25", "quantile.50", "quantile.75", "quantile.100", "skew", "kurtosis")
    value1 <- c(as.numeric(table(factor(x, levels1))), nrow(df[is.na(x)==T,]), sum(xx), length(x), mean(xx), sqrt(var(xx)), (var(xx)), median(xx), min(xx), max(xx), as.numeric(quantile(xx)), sum((xx-mean(xx))^3/sqrt(var(xx))^3)/length(x), sum((xx-mean(xx))^4/sqrt(var(xx))^4)/length(x) - 3)
    answer2 <- c(levels2, "<NA>", "sum", "length", "mean", "standard.deviation", "var", "median", "min", "max", "quantile.0", "quantile.25", "quantile.50", "quantile.75", "quantile.100", "skew", "kurtosis")
    value2 <- c(as.numeric(table(factor(y, levels2))), nrow(df[is.na(y)==T,]), sum(yy), length(y), mean(yy), sqrt(var(yy)), (var(yy)), median(yy), min(yy), max(yy), as.numeric(quantile(yy)), sum((yy-mean(yy))^3/sqrt(var(yy))^3)/length(y), sum((yy-mean(yy))^4/sqrt(var(yy))^4)/length(y) - 3)
    answer <- c(answer1, answer2)
    question <- c(rep(a, length(answer1)), rep(b, length(answer2)))
    value <- c(value1, value2)
    result <- data.frame(answer, question, value)
    return(result)
   }

用法是：

summaryStatistics(df, 'R', 'S', c(0:10), c(0:20))

它很难看，但最终结果也是如此：）

Answer 2

我找到了一个解决方法，我为每对问题和可能的答案调用了汇总函数。

创建摘要功能

    summaryStatistics <- function(x,levels) {
        xx <- na.omit(x)
        c(table(factor(x, levels=levels), useNA='always', exclude=NULL),
          sum=sum(xx), 
          length=length(x), 
          mean=mean(xx), 
          standard.deviation=sqrt(var(xx)), 
          var=(var(xx)), 
          median=median(xx), 
          min=min(xx), 
          max=max(xx),
          quantile=quantile(xx),
          skew=sum((xx-mean(xx))^3/sqrt(var(xx))^3)/length(x) ,
          kurtosis=sum((xx-mean(xx))^4/sqrt(var(xx))^4)/length(x) - 3
        )
    }

    # create the test data frame
    Id <- c(1,2,3,4,5,6,7,8,9,10)
    ClassA <- c(1,NA,3,1,1,2,1,4,5,3)
    ClassB <- c(2,1,1,3,3,2,1,1,3,3)
    R <- c(1,2,3,NA,9,2,4,5,6,7)
    S <- c(3,7,NA,9,5,8,7,NA,7,6)
    W <- c(4,5,6,7,2,4,5,6,7,8)
    df <- data.frame(Id,ClassA,ClassB,R,S,W)

    ClassAAnswers <- c(1:5,NA)
    ClassBAnswers <- c(1:5,NA)

    RAnswers <- c(0:10,NA);
    SAnswers <- c(0:20,NA);
    WAnswers <- c(0:30,NA);

    # create the result
    result <- setNames(
        nm=c('answer','question','value'),
        as.data.frame(
            as.table(
                simplify2array(
                    lapply(
                        df[c('R')], 
                        summaryStatistics,
                        RAnswers
                    )
                )
            )
        )
    )

    result <- rbind( result, 
        setNames(
                nm=c('answer','question','value'),
                as.data.frame(
                    as.table(
                        simplify2array(
                            lapply(
                                df[c('S')], 
                                summaryStatistics,
                                SAnswers
                            )
                        )
                    )
                )
        )
    )

    result <- rbind( result, 
        setNames(
                nm=c('answer','question','value'),
                as.data.frame(
                    as.table(
                        simplify2array(
                            lapply(
                                df[c('W')], 
                                summaryStatistics,
                                WAnswers
                            )
                        )
                    )
                )
        )
    )

    # change the order to question, answer, value
    result <- result[, c(2, 1, 3)]

    # add the filter
    result <- cbind(filter='None',result)

    # return the result
    result

返回

        filter question             answer      value
    1     None        R                  0  0.0000000
    2     None        R                  1  1.0000000
    3     None        R                  2  2.0000000
    4     None        R                  3  1.0000000
    5     None        R                  4  1.0000000
    6     None        R                  5  1.0000000
    7     None        R                  6  1.0000000
    8     None        R                  7  1.0000000
    9     None        R                  8  0.0000000
    10    None        R                  9  1.0000000
    11    None        R                 10  0.0000000
    12    None        R               <NA>  1.0000000
    13    None        R                sum 39.0000000
    14    None        R             length 10.0000000
    15    None        R               mean  4.3333333
    16    None        R standard.deviation  2.6457513
    17    None        R                var  7.0000000
    18    None        R             median  4.0000000
    19    None        R                min  1.0000000
    20    None        R                max  9.0000000
    21    None        R        quantile.0%  1.0000000
    22    None        R       quantile.25%  2.0000000
    23    None        R       quantile.50%  4.0000000
    24    None        R       quantile.75%  6.0000000
    25    None        R      quantile.100%  9.0000000
    26    None        R               skew  0.3275692
    27    None        R           kurtosis -1.5333333
    28    None        S                  0  0.0000000
    29    None        S                  1  0.0000000
    30    None        S                  2  0.0000000
    31    None        S                  3  1.0000000
    32    None        S                  4  0.0000000
    33    None        S                  5  1.0000000
    34    None        S                  6  1.0000000
    35    None        S                  7  3.0000000
    36    None        S                  8  1.0000000
    37    None        S                  9  1.0000000
    38    None        S                 10  0.0000000
    39    None        S                 11  0.0000000
    40    None        S                 12  0.0000000
    41    None        S                 13  0.0000000
    42    None        S                 14  0.0000000
    43    None        S                 15  0.0000000
    44    None        S                 16  0.0000000
    45    None        S                 17  0.0000000
    46    None        S                 18  0.0000000
    47    None        S                 19  0.0000000
    48    None        S                 20  0.0000000
    49    None        S               <NA>  2.0000000
    50    None        S                sum 52.0000000
    51    None        S             length 10.0000000
    52    None        S               mean  6.5000000
    53    None        S standard.deviation  1.8516402
    54    None        S                var  3.4285714
    55    None        S             median  7.0000000
    56    None        S                min  3.0000000
    57    None        S                max  9.0000000
    58    None        S        quantile.0%  3.0000000
    59    None        S       quantile.25%  5.7500000
    60    None        S       quantile.50%  7.0000000
    61    None        S       quantile.75%  7.2500000
    62    None        S      quantile.100%  9.0000000
    63    None        S               skew -0.4252986
    64    None        S           kurtosis -1.3028646
    65    None        W                  0  0.0000000
    66    None        W                  1  0.0000000
    67    None        W                  2  1.0000000
    68    None        W                  3  0.0000000
    69    None        W                  4  2.0000000
    70    None        W                  5  2.0000000
    71    None        W                  6  2.0000000
    72    None        W                  7  2.0000000
    73    None        W                  8  1.0000000
    74    None        W                  9  0.0000000
    75    None        W                 10  0.0000000
    76    None        W                 11  0.0000000
    77    None        W                 12  0.0000000
    78    None        W                 13  0.0000000
    79    None        W                 14  0.0000000
    80    None        W                 15  0.0000000
    81    None        W                 16  0.0000000
    82    None        W                 17  0.0000000
    83    None        W                 18  0.0000000
    84    None        W                 19  0.0000000
    85    None        W                 20  0.0000000
    86    None        W                 21  0.0000000
    87    None        W                 22  0.0000000
    88    None        W                 23  0.0000000
    89    None        W                 24  0.0000000
    90    None        W                 25  0.0000000
    91    None        W                 26  0.0000000
    92    None        W                 27  0.0000000
    93    None        W                 28  0.0000000
    94    None        W                 29  0.0000000
    95    None        W                 30  0.0000000
    96    None        W               <NA>  0.0000000
    97    None        W                sum 54.0000000
    98    None        W             length 10.0000000
    99    None        W               mean  5.4000000
    100   None        W standard.deviation  1.7763883
    101   None        W                var  3.1555556
    102   None        W             median  5.5000000
    103   None        W                min  2.0000000
    104   None        W                max  8.0000000
    105   None        W        quantile.0%  2.0000000
    106   None        W       quantile.25%  4.2500000
    107   None        W       quantile.50%  5.5000000
    108   None        W       quantile.75%  6.7500000
    109   None        W      quantile.100%  8.0000000
    110   None        W               skew -0.3339582
    111   None        W           kurtosis -0.9871315

这就是我要找的。

Answer 3

我最终使用另一个函数来对summary函数进行多次调用。

来自Extracting a vector from a list for a R function的新代码

    # create the summary function
    summaryStatistics <- function(x,levels) {
        xx <- na.omit(x)
        c(table(factor(x, levels=levels), useNA='always', exclude=NULL),
          sum=sum(xx), 
          length=length(x), 
          mean=mean(xx), 
          standard.deviation=sqrt(var(xx)), 
          var=(var(xx)), 
          median=median(xx), 
          min=min(xx), 
          max=max(xx),
          quantile=quantile(xx),
          skew=sum((xx-mean(xx))^3/sqrt(var(xx))^3)/length(x) ,
          kurtosis=sum((xx-mean(xx))^4/sqrt(var(xx))^4)/length(x) - 3
        )
    }

    # create the function that steps through the summary function 
    extractSummaryDataframe <- function( questions.dataframe, answers.list, filter) {

        result <- data.frame(
            answer=factor(),
            question=factor(),
            value=double()
        ) ;
        listIndex <- 0 ;
        for ( name in names(questions.dataframe)){
            listIndex <- listIndex + 1 ;

            result <- rbind( result, 
                setNames(
                        nm=c('answer','question','value'),
                        as.data.frame(
                            as.table(
                                simplify2array(
                                    lapply(
                                        questions.dataframe[c(name)], 
                                        summaryStatistics,
                                        answers.list[[listIndex]]
                                    )
                                )
                            )
                        )
                )
            )           
        }

        result <- result[, c(2, 1, 3)] ;
        result <- cbind(filter=filter,result) ;
        result
    }

    # create the test data frame
    Id <- c(1,2,3,4,5,6,7,8,9,10)
    ClassA <- c(1,NA,3,1,1,2,1,4,5,3)
    ClassB <- c(2,1,1,3,3,2,1,1,3,3)
    R <- c(1,2,3,NA,9,2,4,5,6,7)
    S <- c(3,7,NA,9,5,8,7,NA,7,6)
    W <- c(4,5,6,7,2,4,5,6,7,8)
    df <- data.frame(Id,ClassA,ClassB,R,S,W)

    ClassAAnswers <- c(1:5,NA)
    ClassBAnswers <- c(1:5,NA)

    RAnswers <- c(0:10,NA);
    SAnswers <- c(0:20,NA);
    WAnswers <- c(0:30,NA);
    answers.list <- list(RAnswers,SAnswers,WAnswers);

    RSW.df <- df[c('R','S','W')];

    # create the result
    result <- extractSummaryDataframe( RSW.df, answers.list, 'None') ;

    # return the result
    result

返回

        filter question             answer      value
    1     None        R                  0  0.0000000
    2     None        R                  1  1.0000000
    3     None        R                  2  2.0000000
    4     None        R                  3  1.0000000
    5     None        R                  4  1.0000000
    6     None        R                  5  1.0000000
    7     None        R                  6  1.0000000
    8     None        R                  7  1.0000000
    9     None        R                  8  0.0000000
    10    None        R                  9  1.0000000
    11    None        R                 10  0.0000000
    12    None        R               <NA>  1.0000000
    13    None        R                sum 39.0000000
    14    None        R             length 10.0000000
    15    None        R               mean  4.3333333
    16    None        R standard.deviation  2.6457513
    17    None        R                var  7.0000000
    18    None        R             median  4.0000000
    19    None        R                min  1.0000000
    20    None        R                max  9.0000000
    21    None        R        quantile.0%  1.0000000
    22    None        R       quantile.25%  2.0000000
    23    None        R       quantile.50%  4.0000000
    24    None        R       quantile.75%  6.0000000
    25    None        R      quantile.100%  9.0000000
    26    None        R               skew  0.3275692
    27    None        R           kurtosis -1.5333333
    28    None        S                  0  0.0000000
    29    None        S                  1  0.0000000
    30    None        S                  2  0.0000000
    31    None        S                  3  1.0000000
    32    None        S                  4  0.0000000
    33    None        S                  5  1.0000000
    34    None        S                  6  1.0000000
    35    None        S                  7  3.0000000
    36    None        S                  8  1.0000000
    37    None        S                  9  1.0000000
    38    None        S                 10  0.0000000
    39    None        S                 11  0.0000000
    40    None        S                 12  0.0000000
    41    None        S                 13  0.0000000
    42    None        S                 14  0.0000000
    43    None        S                 15  0.0000000
    44    None        S                 16  0.0000000
    45    None        S                 17  0.0000000
    46    None        S                 18  0.0000000
    47    None        S                 19  0.0000000
    48    None        S                 20  0.0000000
    49    None        S               <NA>  2.0000000
    50    None        S                sum 52.0000000
    51    None        S             length 10.0000000
    52    None        S               mean  6.5000000
    53    None        S standard.deviation  1.8516402
    54    None        S                var  3.4285714
    55    None        S             median  7.0000000
    56    None        S                min  3.0000000
    57    None        S                max  9.0000000
    58    None        S        quantile.0%  3.0000000
    59    None        S       quantile.25%  5.7500000
    60    None        S       quantile.50%  7.0000000
    61    None        S       quantile.75%  7.2500000
    62    None        S      quantile.100%  9.0000000
    63    None        S               skew -0.4252986
    64    None        S           kurtosis -1.3028646
    65    None        W                  0  0.0000000
    66    None        W                  1  0.0000000
    67    None        W                  2  1.0000000
    68    None        W                  3  0.0000000
    69    None        W                  4  2.0000000
    70    None        W                  5  2.0000000
    71    None        W                  6  2.0000000
    72    None        W                  7  2.0000000
    73    None        W                  8  1.0000000
    74    None        W                  9  0.0000000
    75    None        W                 10  0.0000000
    76    None        W                 11  0.0000000
    77    None        W                 12  0.0000000
    78    None        W                 13  0.0000000
    79    None        W                 14  0.0000000
    80    None        W                 15  0.0000000
    81    None        W                 16  0.0000000
    82    None        W                 17  0.0000000
    83    None        W                 18  0.0000000
    84    None        W                 19  0.0000000
    85    None        W                 20  0.0000000
    86    None        W                 21  0.0000000
    87    None        W                 22  0.0000000
    88    None        W                 23  0.0000000
    89    None        W                 24  0.0000000
    90    None        W                 25  0.0000000
    91    None        W                 26  0.0000000
    92    None        W                 27  0.0000000
    93    None        W                 28  0.0000000
    94    None        W                 29  0.0000000
    95    None        W                 30  0.0000000
    96    None        W               <NA>  0.0000000
    97    None        W                sum 54.0000000
    98    None        W             length 10.0000000
    99    None        W               mean  5.4000000
    100   None        W standard.deviation  1.7763883
    101   None        W                var  3.1555556
    102   None        W             median  5.5000000
    103   None        W                min  2.0000000
    104   None        W                max  8.0000000
    105   None        W        quantile.0%  2.0000000
    106   None        W       quantile.25%  4.2500000
    107   None        W       quantile.50%  5.5000000
    108   None        W       quantile.75%  6.7500000
    109   None        W      quantile.100%  8.0000000
    110   None        W               skew -0.3339582
    111   None        W           kurtosis -0.9871315

这正是我所寻找的： - ）。

使用R中的向量来更改函数的输出

3 个答案:

创建摘要功能