摘要函数的输出

时间:2014-06-23 09:18:15

标签: r plyr summary

我希望使用以下方法获得相等的长度输出:

  sapply(df,摘要)

其中

  

df = data.frame(x = 1:10,y = rep(10:11,5),z = c(1:4,NA,NA,NA,   3:5))

sapply(dd, summary)我得到了

$x
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1.00    3.25    5.50    5.50    7.75   10.00 
$y
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   10.0    10.0    10.5    10.5    11.0    11.0 
$z
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
  1.000   2.500   3.000   3.143   4.000   5.000       3 

我的问题是它为至少有一个NA的列提供了NA的长度,否则就没有了。我希望获得相等的长度输出,如果列没有丢失,最好NA的数量为0。

我想得到类似的东西>

          x     y      z        
 Min.    1.00   10.0   1.000  
 1st Qu. 3.25   10.0   2.500  
 Median  5.50   10.5   3.000  
 Mean    5.50   10.5   3.143  
 3rd Qu. 7.75   11.0   4.000  
 Max.    10.00  11.0   5.000  
 NA's    0      0      3    

4 个答案:

答案 0 :(得分:3)

这也可以。

> s <- sapply(df, summary)
> sapply(s, function(x){
      if(!length(x)==7) x[7] <- 0; names(x)[7] <- "NA's"; x 
  })
#             x    y     z
# Min.     1.00 10.0 1.000
# 1st Qu.  3.25 10.0 2.500
# Median   5.50 10.5 3.000
# Mean     5.50 10.5 3.143
# 3rd Qu.  7.75 11.0 4.000
# Max.    10.00 11.0 5.000
# NA's     0.00  0.0 3.000

答案 1 :(得分:2)

调整summary功能(输入summary.default查看):

mysummary <- function (object, ..., digits = max(3L, getOption("digits") - 
                                                   3L)) 
{
  if (is.factor(object)) 
    return(summary.factor(object, ...))
  else if (is.matrix(object)) 
    return(summary.matrix(object, digits = digits, ...))
  value <- if (is.logical(object)) 
    c(Mode = "logical", {
      tb <- table(object, exclude = NULL)
      if (!is.null(n <- dimnames(tb)[[1L]]) && any(iN <- is.na(n))) dimnames(tb)[[1L]][iN] <- "NA's"
      tb
    })
  else if (is.numeric(object)) {
    nas <- is.na(object)
    object <- object[!nas]
    qq <- stats::quantile(object)
    qq <- signif(c(qq[1L:3L], mean(object), qq[4L:5L]), digits)
    names(qq) <- c("Min.", "1st Qu.", "Median", "Mean", "3rd Qu.", 
                   "Max.")
   # if (any(nas)) 
      c(qq, `NA's` = sum(nas))
   # else qq
  }
  else if (is.recursive(object) && !is.language(object) && 
             (n <- length(object))) {
    sumry <- array("", c(n, 3L), list(names(object), c("Length", 
                                                       "Class", "Mode")))
    ll <- numeric(n)
    for (i in 1L:n) {
      ii <- object[[i]]
      ll[i] <- length(ii)
      cls <- oldClass(ii)
      sumry[i, 2L] <- if (length(cls)) 
        cls[1L]
      else "-none-"
      sumry[i, 3L] <- mode(ii)
    }
    sumry[, 1L] <- format(as.integer(ll))
    sumry
  }
  else c(Length = length(object), Class = class(object), Mode = mode(object))
  class(value) <- c("summaryDefault", "table")
  value
}

并使用

lapply(df, mysummary)

sapply(df, mysummary)

答案 2 :(得分:2)

我只想做

temp <- sapply(df, function(x) summary(c(x, NA)))
temp[dim(temp)[1], ] <- temp[dim(temp)[1], ] - 1 

            x    y     z
Min.     1.00 10.0 1.000
1st Qu.  3.25 10.0 2.500
Median   5.50 10.5 3.000
Mean     5.50 10.5 3.143
3rd Qu.  7.75 11.0 4.000
Max.    10.00 11.0 5.000
NA's     0.00  0.0 3.000

答案 3 :(得分:0)

不使用循环:

s1 <- summary(df)
library(stringr)
#Extract numeric values with regular expression
val <- as.numeric(gsub(".*\\: ?","",s1))
val[is.na(val)] <- 0 #change NAs' to 0
#Extract string up to `:` and remove the leading and trailing space with ?str_trim
rowNm <- str_trim(gsub("^(.* ?)\\:.*","\\1",s1)[,3])
colNm <-  str_trim(colnames(s1))
#Create matrix with extracted values
s2 <- matrix(val,7,3, dimnames=list(rowNm,colNm))
 s2
            x    y     z
Min.     1.00 10.0 1.000
1st Qu.  3.25 10.0 2.500
Median   5.50 10.5 3.000
Mean     5.50 10.5 3.143
3rd Qu.  7.75 11.0 4.000
Max.    10.00 11.0 5.000
NA's     0.00  0.0 3.000