
时间:2017-01-08 04:27:40

标签: r statistics usage-statistics


   X  A1  A2  A3  M1  M2  M3  U1  U2  U3
1      A   A   A   M   M   M   U   U   U
2 X1 100 200 250 200 230 400 400 100 200
3 X2 600 300 400 300 550 750 800 900 540
4 X3 500 300 200 200 200 100 500 400 600

数据包含列上的样本和行上的变量。第一行是样本名称,第二行是组(A,M,U)。我想获得每个组的描述性统计数据。例如,对于组A(A1,A2,A3)的均值,sd ....谁能告诉我怎样才能做到这一点。我已经看到了描述性统计数据的大部分答案,而且它们用于列。 如果问题不明确,请告诉我。 谢谢你的帮助。


1 个答案:

答案 0 :(得分:2)




# Load useful 'tidy data' packages

# Make 'mydata'
mydata <- data_frame(X = c('', 'X1', 'X2', 'X3'),
                     A1 = c('A', 100, 600, 500),
                     A2 = c('A', 200, 300, 300),
                     A3 = c('A', 250, 400, 200),
                     M1 = c('M', 200, 300, 200),
                     M2 = c('M', 230, 550, 200),
                     M3 = c('M', 400, 750, 100),
                     U1 = c('U', 400, 800, 500),
                     U2 = c('U', 100, 900, 400),
                     U3 = c('U', 200, 540, 600))

# View 'mydata'

#> # A tibble: 4 x 10
#>   X     A1    A2    A3    M1    M2    M3    U1    U2    U3   
#>   <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 ""    A     A     A     M     M     M     U     U     U    
#> 2 X1    100   200   250   200   230   400   400   100   200  
#> 3 X2    600   300   400   300   550   750   800   900   540  
#> 4 X3    500   300   200   200   200   100   500   400   600


# Transpose rows and columns and convert resulting matrix back into a dataframe
mydata_new <- as_data_frame(t(mydata))

# View 'mydata_new'

#> # A tibble: 10 x 4
#>    V1    V2    V3    V4   
#>    <chr> <chr> <chr> <chr>
#>  1 ""    X1    X2    X3   
#>  2 A     100   600   500  
#>  3 A     200   300   300  
#>  4 A     250   400   200  
#>  5 M     200   300   200  
#>  6 M     230   550   200  
#>  7 M     400   750   100  
#>  8 U     400   800   500  
#>  9 U     100   900   400  
#> 10 U     200   540   600

# Clean 'mydata_new'
## Add column names
colnames(mydata_new) <- c('Group', 'X1', 'X2', 'X3')
## Remove first row
mydata_new <- mydata_new[-1, ]

# View cleaned 'mydata_new'

#> # A tibble: 9 x 4
#>   Group X1    X2    X3   
#>   <chr> <chr> <chr> <chr>
#> 1 A     100   600   500  
#> 2 A     200   300   300  
#> 3 A     250   400   200  
#> 4 M     200   300   200  
#> 5 M     230   550   200  
#> 6 M     400   750   100  
#> 7 U     400   800   500  
#> 8 U     100   900   400  
#> 9 U     200   540   600


# Summarise numeric data
mydata_new %>% 
    # Convert all data columns from 'character' to 'numeric'
              as.numeric) %>%
    # Group data by the grouping variable before summarising
    group_by(Group) %>% 
    # Calculate MEAN and SD for each data column
                 funs(MEAN = mean, SD = sd))

#> # A tibble: 3 x 7
#>   Group X1_MEAN X2_MEAN X3_MEAN X1_SD X2_SD X3_SD
#>   <chr>   <dbl>   <dbl>   <dbl> <dbl> <dbl> <dbl>
#> 1 A        183.    433.    333.  76.4  153. 153. 
#> 2 M        277.    533.    167. 108.   225.  57.7
#> 3 U        233.    747.    500  153.   186. 100



# Define function: (cv = sd / mean)
coef_var = function(x) {
    sd(x, na.rm = TRUE) / mean(x, na.rm = TRUE)


# Execute summary 
mydata_new %>% 
    # Convert all data columns from 'character' to 'numeric'
              as.numeric) %>%
    # Group data by the grouping variable before summarising
    group_by(Group) %>% 
    # Calculate summaries each data column 
    ## Call the summary functions with a dummy "." argument so that 
    ## Additional arguments can be added to the called functions 
    ## (e.g., adding na.rm = TRUE to cope with missing data)
    ## See ?dplyr::funs for details
                 funs(MEAN = mean(., na.rm = TRUE), # Mean
                      SD = sd(., na.rm = TRUE), # SD
                      CV = coef_var, # Coefficient of variation
                      # Add other summary stats as needed
                      MEDIAN = median(., na.rm = TRUE), # Median
                      Q25 = quantile(., prob = 0.25, na.rm = TRUE), # 25th percentile
                      Q75 = quantile(., prob = 0.75, na.rm = TRUE), # 75th percentile
                      min = min(., na.rm = TRUE), # Minimum
                      max = max(., na.rm = TRUE))) # Maximum

#> # A tibble: 3 x 25
#>   Group X1_MEAN X2_MEAN X3_MEAN X1_SD X2_SD X3_SD X1_CV X2_CV X3_CV
#>   <chr>   <dbl>   <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 A        183.    433.    333.  76.4  153. 153.  0.417 0.353 0.458
#> 2 M        277.    533.    167. 108.   225.  57.7 0.390 0.423 0.346
#> 3 U        233.    747.    500  153.   186. 100   0.655 0.249 0.2  
#> # ... with 15 more variables: X1_MEDIAN <dbl>, X2_MEDIAN <dbl>,
#> #   X3_MEDIAN <dbl>, X1_Q25 <dbl>, X2_Q25 <dbl>, X3_Q25 <dbl>,
#> #   X1_Q75 <dbl>, X2_Q75 <dbl>, X3_Q75 <dbl>, X1_min <dbl>, X2_min <dbl>,
#> #   X3_min <dbl>, X1_max <dbl>, X2_max <dbl>, X3_max <dbl>

reprex package(v0.2.0)创建于2018-05-10。