从列表中提取R函数的向量

时间:2016-04-30 06:20:05

标签: r

我正在为报告创建摘要数据框。我已经能够手动创建我想要的数据框了。我正在创建一个函数来简化结果的创建。

手动流程

# create the summary function
    summaryStatistics <- function(x,levels) {
        xx <- na.omit(x)
        c(table(factor(x, levels=levels), useNA='always', exclude=NULL),
          sum=sum(xx), 
          length=length(x), 
          mean=mean(xx), 
          standard.deviation=sqrt(var(xx)), 
          var=(var(xx)), 
          median=median(xx), 
          min=min(xx), 
          max=max(xx),
          quantile=quantile(xx),
          skew=sum((xx-mean(xx))^3/sqrt(var(xx))^3)/length(x) ,
          kurtosis=sum((xx-mean(xx))^4/sqrt(var(xx))^4)/length(x) - 3
        )
    }

    # create the test data frame
    Id <- c(1,2,3,4,5,6,7,8,9,10)
    ClassA <- c(1,NA,3,1,1,2,1,4,5,3)
    ClassB <- c(2,1,1,3,3,2,1,1,3,3)
    R <- c(1,2,3,NA,9,2,4,5,6,7)
    S <- c(3,7,NA,9,5,8,7,NA,7,6)
    W <- c(4,5,6,7,2,4,5,6,7,8)

    df <- data.frame(Id,ClassA,ClassB,R,S,W)

    ClassAAnswers <- c(1:5,NA)
    ClassBAnswers <- c(1:5,NA)

    RAnswers <- c(0:10,NA);
    SAnswers <- c(0:20,NA);
    WAnswers <- c(0:30,NA);
    answers.list <- list(RAnswers,SAnswers,WAnswers);

    RSW.df <- df[c('R','S','W')];

    # create the result
    result <- setNames(
        nm=c('answer','question','value'),
        as.data.frame(
            as.table(
                simplify2array(
                    lapply(
                        df[c('R')], 
                        summaryStatistics,
                        RAnswers
                    )
                )
            )
        )
    )

    result <- rbind( result, 
        setNames(
                nm=c('answer','question','value'),
                as.data.frame(
                    as.table(
                        simplify2array(
                            lapply(
                                df[c('S')], 
                                summaryStatistics,
                                SAnswers
                            )
                        )
                    )
                )
        )
    )

    result <- rbind( result, 
        setNames(
                nm=c('answer','question','value'),
                as.data.frame(
                    as.table(
                        simplify2array(
                            lapply(
                                df[c('W')], 
                                summaryStatistics,
                                WAnswers
                            )
                        )
                    )
                )
        )
    )

    # change the order to question, answer, value
    result <- result[, c(2, 1, 3)]

    # add the filter
    result <- cbind(filter='None',result)

    # return the result
    result 

产生结果

       filter question             answer      value
    1     None        R                  0  0.0000000
    2     None        R                  1  1.0000000
    3     None        R                  2  2.0000000
    4     None        R                  3  1.0000000
    5     None        R                  4  1.0000000
    6     None        R                  5  1.0000000
    7     None        R                  6  1.0000000
    8     None        R                  7  1.0000000
    9     None        R                  8  0.0000000
    10    None        R                  9  1.0000000
    11    None        R                 10  0.0000000
    12    None        R               <NA>  1.0000000
    13    None        R                sum 39.0000000
    14    None        R             length 10.0000000
    15    None        R               mean  4.3333333
    16    None        R standard.deviation  2.6457513
    17    None        R                var  7.0000000
    18    None        R             median  4.0000000
    19    None        R                min  1.0000000
    20    None        R                max  9.0000000
    21    None        R        quantile.0%  1.0000000
    22    None        R       quantile.25%  2.0000000
    23    None        R       quantile.50%  4.0000000
    24    None        R       quantile.75%  6.0000000
    25    None        R      quantile.100%  9.0000000
    26    None        R               skew  0.3275692
    27    None        R           kurtosis -1.5333333
    28    None        S                  0  0.0000000
    29    None        S                  1  0.0000000
    30    None        S                  2  0.0000000
    31    None        S                  3  1.0000000
    32    None        S                  4  0.0000000
    33    None        S                  5  1.0000000
    34    None        S                  6  1.0000000
    35    None        S                  7  3.0000000
    36    None        S                  8  1.0000000
    37    None        S                  9  1.0000000
    38    None        S                 10  0.0000000
    39    None        S                 11  0.0000000
    40    None        S                 12  0.0000000
    41    None        S                 13  0.0000000
    42    None        S                 14  0.0000000
    43    None        S                 15  0.0000000
    44    None        S                 16  0.0000000
    45    None        S                 17  0.0000000
    46    None        S                 18  0.0000000
    47    None        S                 19  0.0000000
    48    None        S                 20  0.0000000
    49    None        S               <NA>  2.0000000
    50    None        S                sum 52.0000000
    51    None        S             length 10.0000000
    52    None        S               mean  6.5000000
    53    None        S standard.deviation  1.8516402
    54    None        S                var  3.4285714
    55    None        S             median  7.0000000
    56    None        S                min  3.0000000
    57    None        S                max  9.0000000
    58    None        S        quantile.0%  3.0000000
    59    None        S       quantile.25%  5.7500000
    60    None        S       quantile.50%  7.0000000
    61    None        S       quantile.75%  7.2500000
    62    None        S      quantile.100%  9.0000000
    63    None        S               skew -0.4252986
    64    None        S           kurtosis -1.3028646
    65    None        W                  0  0.0000000
    66    None        W                  1  0.0000000
    67    None        W                  2  1.0000000
    68    None        W                  3  0.0000000
    69    None        W                  4  2.0000000
    70    None        W                  5  2.0000000
    71    None        W                  6  2.0000000
    72    None        W                  7  2.0000000
    73    None        W                  8  1.0000000
    74    None        W                  9  0.0000000
    75    None        W                 10  0.0000000
    76    None        W                 11  0.0000000
    77    None        W                 12  0.0000000
    78    None        W                 13  0.0000000
    79    None        W                 14  0.0000000
    80    None        W                 15  0.0000000
    81    None        W                 16  0.0000000
    82    None        W                 17  0.0000000
    83    None        W                 18  0.0000000
    84    None        W                 19  0.0000000
    85    None        W                 20  0.0000000
    86    None        W                 21  0.0000000
    87    None        W                 22  0.0000000
    88    None        W                 23  0.0000000
    89    None        W                 24  0.0000000
    90    None        W                 25  0.0000000
    91    None        W                 26  0.0000000
    92    None        W                 27  0.0000000
    93    None        W                 28  0.0000000
    94    None        W                 29  0.0000000
    95    None        W                 30  0.0000000
    96    None        W               <NA>  0.0000000
    97    None        W                sum 54.0000000
    98    None        W             length 10.0000000
    99    None        W               mean  5.4000000
    100   None        W standard.deviation  1.7763883
    101   None        W                var  3.1555556
    102   None        W             median  5.5000000
    103   None        W                min  2.0000000
    104   None        W                max  8.0000000
    105   None        W        quantile.0%  2.0000000
    106   None        W       quantile.25%  4.2500000
    107   None        W       quantile.50%  5.5000000
    108   None        W       quantile.75%  6.7500000
    109   None        W      quantile.100%  8.0000000
    110   None        W               skew -0.3339582
    111   None        W           kurtosis -0.9871315

这就是我正在寻找的。

我创建了一个功能来逐步完成数据框和可能的答案。 如果我对矢量进行硬编码,我得到的结果与上面的结果一致。

extractSummaryDataframe <- function( questions.dataframe, answers.list, filter) {

        result <- data.frame(
            answer=factor(),
            question=factor(),
            value=double()
        ) ;
        listIndex <- 0 ;
        for ( name in names(questions.dataframe)){
            listIndex <- listIndex + 1 ;
            result <- rbind( result, 
                setNames(
                        nm=c('answer','question','value'),
                        as.data.frame(
                            as.table(
                                simplify2array(
                                    lapply(
                                        questions.dataframe[c(name)], 
                                        summaryStatistics,
                                        c(0:10,NA)
                                    )
                                )
                            )
                        )
                )
            )           
        }

        result <- cbind(filter=filter,result) ;
        result
    }

    extractSummaryDataframe( RSW.df, answers.list, 'None')

返回

       filter             answer question      value
    1    None                  0        R  0.0000000
    2    None                  1        R  1.0000000
    3    None                  2        R  2.0000000
    4    None                  3        R  1.0000000
    5    None                  4        R  1.0000000
    6    None                  5        R  1.0000000
    7    None                  6        R  1.0000000
    8    None                  7        R  1.0000000
    9    None                  8        R  0.0000000
    10   None                  9        R  1.0000000
    11   None                 10        R  0.0000000
    12   None               <NA>        R  1.0000000
    13   None                sum        R 39.0000000
    14   None             length        R 10.0000000
    15   None               mean        R  4.3333333
    16   None standard.deviation        R  2.6457513
    17   None                var        R  7.0000000
    18   None             median        R  4.0000000
    19   None                min        R  1.0000000
    20   None                max        R  9.0000000
    21   None        quantile.0%        R  1.0000000
    22   None       quantile.25%        R  2.0000000
    23   None       quantile.50%        R  4.0000000
    24   None       quantile.75%        R  6.0000000
    25   None      quantile.100%        R  9.0000000
    26   None               skew        R  0.3275692
    27   None           kurtosis        R -1.5333333
    28   None                  0        S  0.0000000
    29   None                  1        S  0.0000000
    30   None                  2        S  0.0000000
    31   None                  3        S  1.0000000
    32   None                  4        S  0.0000000
    33   None                  5        S  1.0000000
    34   None                  6        S  1.0000000
    35   None                  7        S  3.0000000
    36   None                  8        S  1.0000000
    37   None                  9        S  1.0000000
    38   None                 10        S  0.0000000
    39   None               <NA>        S  2.0000000
    40   None                sum        S 52.0000000
    41   None             length        S 10.0000000
    42   None               mean        S  6.5000000
    43   None standard.deviation        S  1.8516402
    44   None                var        S  3.4285714
    45   None             median        S  7.0000000
    46   None                min        S  3.0000000
    47   None                max        S  9.0000000
    48   None        quantile.0%        S  3.0000000
    49   None       quantile.25%        S  5.7500000
    50   None       quantile.50%        S  7.0000000
    51   None       quantile.75%        S  7.2500000
    52   None      quantile.100%        S  9.0000000
    53   None               skew        S -0.4252986
    54   None           kurtosis        S -1.3028646
    55   None                  0        W  0.0000000
    56   None                  1        W  0.0000000
    57   None                  2        W  1.0000000
    58   None                  3        W  0.0000000
    59   None                  4        W  2.0000000
    60   None                  5        W  2.0000000
    61   None                  6        W  2.0000000
    62   None                  7        W  2.0000000
    63   None                  8        W  1.0000000
    64   None                  9        W  0.0000000
    65   None                 10        W  0.0000000
    66   None               <NA>        W  0.0000000
    67   None                sum        W 54.0000000
    68   None             length        W 10.0000000
    69   None               mean        W  5.4000000
    70   None standard.deviation        W  1.7763883
    71   None                var        W  3.1555556
    72   None             median        W  5.5000000
    73   None                min        W  2.0000000
    74   None                max        W  8.0000000
    75   None        quantile.0%        W  2.0000000
    76   None       quantile.25%        W  4.2500000
    77   None       quantile.50%        W  5.5000000
    78   None       quantile.75%        W  6.7500000
    79   None      quantile.100%        W  8.0000000
    80   None               skew        W -0.3339582
    81   None           kurtosis        W -0.9871315

但是,如果我尝试使用每个列表项,因为它会逐步浏览

这样的名称

extractSummaryDataframe&lt; - function(questions.dataframe,answers.list,filter){

        result <- data.frame(
            answer=factor(),
            question=factor(),
            value=double()
        ) ;
        listIndex <- 0 ;
        for ( name in names(questions.dataframe)){
            listIndex <- listIndex + 1 ;
            result <- rbind( result, 
                setNames(
                        nm=c('answer','question','value'),
                        as.data.frame(
                            as.table(
                                simplify2array(
                                    lapply(
                                        questions.dataframe[c(name)], 
                                        summaryStatistics,
                                        answers.list[listIndex]
                                    )
                                )
                            )
                        )
                )
            )           
        }

        result <- cbind(filter=filter,result) ;
        result
    }

    extractSummaryDataframe( RSW.df, answers.list, 'None')

               filter
    1    None
    2    None
    3    None
    4    None
    5    None
    6    None
    7    None
    8    None
    9    None
    10   None
    11   None
    12   None
    13   None
    14   None
    15   None
    16   None
    17   None
    18   None
    19   None
    20   None
    21   None
    22   None
    23   None
    24   None
    25   None
    26   None
    27   None
    28   None
    29   None
    30   None
    31   None
    32   None
    33   None
    34   None
    35   None
    36   None
    37   None
    38   None
    39   None
    40   None
    41   None
    42   None
    43   None
    44   None
    45   None
    46   None
    47   None
    48   None
    49   None
    50   None
    51   None
                                                                                                                        answer
    1                                                                                  c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, NA)
    2                                                                                                                     <NA>
    3                                                                                                                      sum
    4                                                                                                                   length
    5                                                                                                                     mean
    6                                                                                                       standard.deviation
    7                                                                                                                      var
    8                                                                                                                   median
    9                                                                                                                      min
    10                                                                                                                     max
    11                                                                                                             quantile.0%
    12                                                                                                            quantile.25%
    13                                                                                                            quantile.50%
    14                                                                                                            quantile.75%
    15                                                                                                           quantile.100%
    16                                                                                                                    skew
    17                                                                                                                kurtosis
    18                                         c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, NA)
    19                                                                                                                    <NA>
    20                                                                                                                     sum
    21                                                                                                                  length
    22                                                                                                                    mean
    23                                                                                                      standard.deviation
    24                                                                                                                     var
    25                                                                                                                  median
    26                                                                                                                     min
    27                                                                                                                     max
    28                                                                                                             quantile.0%
    29                                                                                                            quantile.25%
    30                                                                                                            quantile.50%
    31                                                                                                            quantile.75%
    32                                                                                                           quantile.100%
    33                                                                                                                    skew
    34                                                                                                                kurtosis
    35 c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, NA)
    36                                                                                                                    <NA>
    37                                                                                                                     sum
    38                                                                                                                  length
    39                                                                                                                    mean
    40                                                                                                      standard.deviation
    41                                                                                                                     var
    42                                                                                                                  median
    43                                                                                                                     min
    44                                                                                                                     max
    45                                                                                                             quantile.0%
    46                                                                                                            quantile.25%
    47                                                                                                            quantile.50%
    48                                                                                                            quantile.75%
    49                                                                                                           quantile.100%
    50                                                                                                                    skew
    51                                                                                                                kurtosis
       question      value
    1         R  0.0000000
    2         R 10.0000000
    3         R 39.0000000
    4         R 10.0000000
    5         R  4.3333333
    6         R  2.6457513
    7         R  7.0000000
    8         R  4.0000000
    9         R  1.0000000
    10        R  9.0000000
    11        R  1.0000000
    12        R  2.0000000
    13        R  4.0000000
    14        R  6.0000000
    15        R  9.0000000
    16        R  0.3275692
    17        R -1.5333333
    18        S  0.0000000
    19        S 10.0000000
    20        S 52.0000000
    21        S 10.0000000
    22        S  6.5000000
    23        S  1.8516402
    24        S  3.4285714
    25        S  7.0000000
    26        S  3.0000000
    27        S  9.0000000
    28        S  3.0000000
    29        S  5.7500000
    30        S  7.0000000
    31        S  7.2500000
    32        S  9.0000000
    33        S -0.4252986
    34        S -1.3028646
    35        W  0.0000000
    36        W 10.0000000
    37        W 54.0000000
    38        W 10.0000000
    39        W  5.4000000
    40        W  1.7763883
    41        W  3.1555556
    42        W  5.5000000
    43        W  2.0000000
    44        W  8.0000000
    45        W  2.0000000
    46        W  4.2500000
    47        W  5.5000000
    48        W  6.7500000
    49        W  8.0000000
    50        W -0.3339582
    51        W -0.9871315

这与我正在寻找的结果完全不同。

将answers.list [1]识别为c(0:10,NA)的语法是什么?

1 个答案:

答案 0 :(得分:0)

我遇到的语法错误是answers.list [listIndex]需要是answers.list [[listIndex]]。

我通过使用类函数发现了这一点。

class(answers.list) returned list: expected.

class(answers.list[1]) returned list: unexpected.

class(answers.list[[1]]) returned integer: which is what I was looking for.

新代码

    # create the summary function
    summaryStatistics <- function(x,levels) {
        xx <- na.omit(x)
        c(table(factor(x, levels=levels), useNA='always', exclude=NULL),
          sum=sum(xx), 
          length=length(x), 
          mean=mean(xx), 
          standard.deviation=sqrt(var(xx)), 
          var=(var(xx)), 
          median=median(xx), 
          min=min(xx), 
          max=max(xx),
          quantile=quantile(xx),
          skew=sum((xx-mean(xx))^3/sqrt(var(xx))^3)/length(x) ,
          kurtosis=sum((xx-mean(xx))^4/sqrt(var(xx))^4)/length(x) - 3
        )
    }

    # create the function that steps through the summary function 
    extractSummaryDataframe <- function( questions.dataframe, answers.list, filter) {

        result <- data.frame(
            answer=factor(),
            question=factor(),
            value=double()
        ) ;
        listIndex <- 0 ;
        for ( name in names(questions.dataframe)){
            listIndex <- listIndex + 1 ;

            result <- rbind( result, 
                setNames(
                        nm=c('answer','question','value'),
                        as.data.frame(
                            as.table(
                                simplify2array(
                                    lapply(
                                        questions.dataframe[c(name)], 
                                        summaryStatistics,
                                        answers.list[[listIndex]]
                                    )
                                )
                            )
                        )
                )
            )           
        }

        result <- result[, c(2, 1, 3)] ;
        result <- cbind(filter=filter,result) ;
        result
    }

    # create the test data frame
    Id <- c(1,2,3,4,5,6,7,8,9,10)
    ClassA <- c(1,NA,3,1,1,2,1,4,5,3)
    ClassB <- c(2,1,1,3,3,2,1,1,3,3)
    R <- c(1,2,3,NA,9,2,4,5,6,7)
    S <- c(3,7,NA,9,5,8,7,NA,7,6)
    W <- c(4,5,6,7,2,4,5,6,7,8)
    df <- data.frame(Id,ClassA,ClassB,R,S,W)

    ClassAAnswers <- c(1:5,NA)
    ClassBAnswers <- c(1:5,NA)

    RAnswers <- c(0:10,NA);
    SAnswers <- c(0:20,NA);
    WAnswers <- c(0:30,NA);
    answers.list <- list(RAnswers,SAnswers,WAnswers);

    RSW.df <- df[c('R','S','W')];

    # create the result
    result <- extractSummaryDataframe( RSW.df, answers.list, 'None') ;

    # return the result
    result 

返回

        filter question             answer      value
    1     None        R                  0  0.0000000
    2     None        R                  1  1.0000000
    3     None        R                  2  2.0000000
    4     None        R                  3  1.0000000
    5     None        R                  4  1.0000000
    6     None        R                  5  1.0000000
    7     None        R                  6  1.0000000
    8     None        R                  7  1.0000000
    9     None        R                  8  0.0000000
    10    None        R                  9  1.0000000
    11    None        R                 10  0.0000000
    12    None        R               <NA>  1.0000000
    13    None        R                sum 39.0000000
    14    None        R             length 10.0000000
    15    None        R               mean  4.3333333
    16    None        R standard.deviation  2.6457513
    17    None        R                var  7.0000000
    18    None        R             median  4.0000000
    19    None        R                min  1.0000000
    20    None        R                max  9.0000000
    21    None        R        quantile.0%  1.0000000
    22    None        R       quantile.25%  2.0000000
    23    None        R       quantile.50%  4.0000000
    24    None        R       quantile.75%  6.0000000
    25    None        R      quantile.100%  9.0000000
    26    None        R               skew  0.3275692
    27    None        R           kurtosis -1.5333333
    28    None        S                  0  0.0000000
    29    None        S                  1  0.0000000
    30    None        S                  2  0.0000000
    31    None        S                  3  1.0000000
    32    None        S                  4  0.0000000
    33    None        S                  5  1.0000000
    34    None        S                  6  1.0000000
    35    None        S                  7  3.0000000
    36    None        S                  8  1.0000000
    37    None        S                  9  1.0000000
    38    None        S                 10  0.0000000
    39    None        S                 11  0.0000000
    40    None        S                 12  0.0000000
    41    None        S                 13  0.0000000
    42    None        S                 14  0.0000000
    43    None        S                 15  0.0000000
    44    None        S                 16  0.0000000
    45    None        S                 17  0.0000000
    46    None        S                 18  0.0000000
    47    None        S                 19  0.0000000
    48    None        S                 20  0.0000000
    49    None        S               <NA>  2.0000000
    50    None        S                sum 52.0000000
    51    None        S             length 10.0000000
    52    None        S               mean  6.5000000
    53    None        S standard.deviation  1.8516402
    54    None        S                var  3.4285714
    55    None        S             median  7.0000000
    56    None        S                min  3.0000000
    57    None        S                max  9.0000000
    58    None        S        quantile.0%  3.0000000
    59    None        S       quantile.25%  5.7500000
    60    None        S       quantile.50%  7.0000000
    61    None        S       quantile.75%  7.2500000
    62    None        S      quantile.100%  9.0000000
    63    None        S               skew -0.4252986
    64    None        S           kurtosis -1.3028646
    65    None        W                  0  0.0000000
    66    None        W                  1  0.0000000
    67    None        W                  2  1.0000000
    68    None        W                  3  0.0000000
    69    None        W                  4  2.0000000
    70    None        W                  5  2.0000000
    71    None        W                  6  2.0000000
    72    None        W                  7  2.0000000
    73    None        W                  8  1.0000000
    74    None        W                  9  0.0000000
    75    None        W                 10  0.0000000
    76    None        W                 11  0.0000000
    77    None        W                 12  0.0000000
    78    None        W                 13  0.0000000
    79    None        W                 14  0.0000000
    80    None        W                 15  0.0000000
    81    None        W                 16  0.0000000
    82    None        W                 17  0.0000000
    83    None        W                 18  0.0000000
    84    None        W                 19  0.0000000
    85    None        W                 20  0.0000000
    86    None        W                 21  0.0000000
    87    None        W                 22  0.0000000
    88    None        W                 23  0.0000000
    89    None        W                 24  0.0000000
    90    None        W                 25  0.0000000
    91    None        W                 26  0.0000000
    92    None        W                 27  0.0000000
    93    None        W                 28  0.0000000
    94    None        W                 29  0.0000000
    95    None        W                 30  0.0000000
    96    None        W               <NA>  0.0000000
    97    None        W                sum 54.0000000
    98    None        W             length 10.0000000
    99    None        W               mean  5.4000000
    100   None        W standard.deviation  1.7763883
    101   None        W                var  3.1555556
    102   None        W             median  5.5000000
    103   None        W                min  2.0000000
    104   None        W                max  8.0000000
    105   None        W        quantile.0%  2.0000000
    106   None        W       quantile.25%  4.2500000
    107   None        W       quantile.50%  5.5000000
    108   None        W       quantile.75%  6.7500000
    109   None        W      quantile.100%  8.0000000
    110   None        W               skew -0.3339582
    111   None        W           kurtosis -0.9871315

这正是我所寻找的: - )。