我希望使用以下方法获得相等的长度输出:
sapply(df,摘要)
其中
df = data.frame(x = 1:10,y = rep(10:11,5),z = c(1:4,NA,NA,NA, 3:5))
sapply(dd, summary)
我得到了
$x
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.00 3.25 5.50 5.50 7.75 10.00
$y
Min. 1st Qu. Median Mean 3rd Qu. Max.
10.0 10.0 10.5 10.5 11.0 11.0
$z
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
1.000 2.500 3.000 3.143 4.000 5.000 3
我的问题是它为至少有一个NA
的列提供了NA
的长度,否则就没有了。我希望获得相等的长度输出,如果列没有丢失,最好NA
的数量为0。
我想得到类似的东西>
x y z
Min. 1.00 10.0 1.000
1st Qu. 3.25 10.0 2.500
Median 5.50 10.5 3.000
Mean 5.50 10.5 3.143
3rd Qu. 7.75 11.0 4.000
Max. 10.00 11.0 5.000
NA's 0 0 3
答案 0 :(得分:3)
这也可以。
> s <- sapply(df, summary)
> sapply(s, function(x){
if(!length(x)==7) x[7] <- 0; names(x)[7] <- "NA's"; x
})
# x y z
# Min. 1.00 10.0 1.000
# 1st Qu. 3.25 10.0 2.500
# Median 5.50 10.5 3.000
# Mean 5.50 10.5 3.143
# 3rd Qu. 7.75 11.0 4.000
# Max. 10.00 11.0 5.000
# NA's 0.00 0.0 3.000
答案 1 :(得分:2)
调整summary
功能(输入summary.default
查看):
mysummary <- function (object, ..., digits = max(3L, getOption("digits") -
3L))
{
if (is.factor(object))
return(summary.factor(object, ...))
else if (is.matrix(object))
return(summary.matrix(object, digits = digits, ...))
value <- if (is.logical(object))
c(Mode = "logical", {
tb <- table(object, exclude = NULL)
if (!is.null(n <- dimnames(tb)[[1L]]) && any(iN <- is.na(n))) dimnames(tb)[[1L]][iN] <- "NA's"
tb
})
else if (is.numeric(object)) {
nas <- is.na(object)
object <- object[!nas]
qq <- stats::quantile(object)
qq <- signif(c(qq[1L:3L], mean(object), qq[4L:5L]), digits)
names(qq) <- c("Min.", "1st Qu.", "Median", "Mean", "3rd Qu.",
"Max.")
# if (any(nas))
c(qq, `NA's` = sum(nas))
# else qq
}
else if (is.recursive(object) && !is.language(object) &&
(n <- length(object))) {
sumry <- array("", c(n, 3L), list(names(object), c("Length",
"Class", "Mode")))
ll <- numeric(n)
for (i in 1L:n) {
ii <- object[[i]]
ll[i] <- length(ii)
cls <- oldClass(ii)
sumry[i, 2L] <- if (length(cls))
cls[1L]
else "-none-"
sumry[i, 3L] <- mode(ii)
}
sumry[, 1L] <- format(as.integer(ll))
sumry
}
else c(Length = length(object), Class = class(object), Mode = mode(object))
class(value) <- c("summaryDefault", "table")
value
}
并使用
lapply(df, mysummary)
或
sapply(df, mysummary)
答案 2 :(得分:2)
我只想做
temp <- sapply(df, function(x) summary(c(x, NA)))
temp[dim(temp)[1], ] <- temp[dim(temp)[1], ] - 1
x y z
Min. 1.00 10.0 1.000
1st Qu. 3.25 10.0 2.500
Median 5.50 10.5 3.000
Mean 5.50 10.5 3.143
3rd Qu. 7.75 11.0 4.000
Max. 10.00 11.0 5.000
NA's 0.00 0.0 3.000
答案 3 :(得分:0)
不使用循环:
s1 <- summary(df)
library(stringr)
#Extract numeric values with regular expression
val <- as.numeric(gsub(".*\\: ?","",s1))
val[is.na(val)] <- 0 #change NAs' to 0
#Extract string up to `:` and remove the leading and trailing space with ?str_trim
rowNm <- str_trim(gsub("^(.* ?)\\:.*","\\1",s1)[,3])
colNm <- str_trim(colnames(s1))
#Create matrix with extracted values
s2 <- matrix(val,7,3, dimnames=list(rowNm,colNm))
s2
x y z
Min. 1.00 10.0 1.000
1st Qu. 3.25 10.0 2.500
Median 5.50 10.5 3.000
Mean 5.50 10.5 3.143
3rd Qu. 7.75 11.0 4.000
Max. 10.00 11.0 5.000
NA's 0.00 0.0 3.000