我正在寻找一种方法,为函数中的不同变量使用不同的比例。
这是来自A simpler way to achieve a frequency count with mean, sum, length and sd in R
的后续问题鉴于
# create the summary function
summaryStatistics <- function(x,levels) {
xx <- na.omit(x)
c(table(factor(x, levels=levels), useNA='always', exclude=NULL),
sum=sum(xx),
length=length(x),
mean=mean(xx),
standard.deviation=sqrt(var(xx)),
var=(var(xx)),
median=median(xx),
min=min(xx),
max=max(xx),
quantile=quantile(xx),
skew=sum((xx-mean(xx))^3/sqrt(var(xx))^3)/length(x) ,
kurtosis=sum((xx-mean(xx))^4/sqrt(var(xx))^4)/length(x) - 3
)
}
# create the test data frame
Id <- c(1,2,3,4,5,6,7,8,9,10)
ClassA <- c(1,NA,3,1,1,2,1,4,5,3)
ClassB <- c(2,1,1,3,3,2,1,1,3,3)
R <- c(1,2,3,NA,9,2,4,5,6,7)
S <- c(3,7,NA,9,5,8,7,NA,7,6)
df <- data.frame(Id,ClassA,ClassB,R,S)
ClassAAnswers <- c(1:5,NA)
ClassBAnswers <- c(1:5,NA)
RAnswers <- c(0:10,NA);
SAnswers <- c(0:20,NA);
# create the result
result <- setNames(
nm=c('answer','question','value'),
as.data.frame(
as.table(
simplify2array(
lapply(
df[c('R', 'S')],
summaryStatistics,
RAnswers
)
)
)
)
)
# change the order to question, answer, value
result <- result[, c(2, 1, 3)]
# add the filter
result <- cbind(filter='None',result)
# return the result
result
我得到了
filter question answer value
1 None R 0 0.0000000
2 None R 1 1.0000000
3 None R 2 2.0000000
4 None R 3 1.0000000
5 None R 4 1.0000000
6 None R 5 1.0000000
7 None R 6 1.0000000
8 None R 7 1.0000000
9 None R 8 0.0000000
10 None R 9 1.0000000
11 None R 10 0.0000000
12 None R <NA> 1.0000000
13 None R sum 39.0000000
14 None R length 10.0000000
15 None R mean 4.3333333
16 None R standard.deviation 2.6457513
17 None R var 7.0000000
18 None R median 4.0000000
19 None R min 1.0000000
20 None R max 9.0000000
21 None R quantile.0% 1.0000000
22 None R quantile.25% 2.0000000
23 None R quantile.50% 4.0000000
24 None R quantile.75% 6.0000000
25 None R quantile.100% 9.0000000
26 None R skew 0.3275692
27 None R kurtosis -1.5333333
28 None S 0 0.0000000
29 None S 1 0.0000000
30 None S 2 0.0000000
31 None S 3 1.0000000
32 None S 4 0.0000000
33 None S 5 1.0000000
34 None S 6 1.0000000
35 None S 7 3.0000000
36 None S 8 1.0000000
37 None S 9 1.0000000
38 None S 10 0.0000000
39 None S <NA> 2.0000000
40 None S sum 52.0000000
41 None S length 10.0000000
42 None S mean 6.5000000
43 None S standard.deviation 1.8516402
44 None S var 3.4285714
45 None S median 7.0000000
46 None S min 3.0000000
47 None S max 9.0000000
48 None S quantile.0% 3.0000000
49 None S quantile.25% 5.7500000
50 None S quantile.50% 7.0000000
51 None S quantile.75% 7.2500000
52 None S quantile.100% 9.0000000
53 None S skew -0.4252986
54 None S kurtosis -1.3028646
S的答案从0到10缩放。
我认为关键是拉扯。
lapply(df[c('R', 'S')], summaryStatistics, c(0:20))
为R和S生成缩放0到20的结果。
lapply(df[c('R', 'S')], summaryStatistics, c(0:10))
为R和S生成从0到10的结果。
lapply(df[c('R', 'S')], summaryStatistics, c(0:20,0:10))
在第一个比例中给出结果,在第二个比例中给出结果,但有一些警告。
警告讯息:
1:在levels<-
(*tmp*
,值= if(nl == nL)as.character(标签),否则粘贴0(标签,:
不推荐使用因子中的重复级别
2:在levels<-
(*tmp*
,值= if(nl == nL)as.character(标签),否则粘贴0(标签,:
不推荐使用因子中的重复级别
3:在levels<-
(*tmp*
,值= if(nl == nL)as.character(标签),否则为paste0(标签,:
不推荐使用因子中的重复级别
4:在levels<-
(*tmp*
,值= if(nl == nL)as.character(标签),否则粘贴0(标签,:
不推荐使用因子中的重复级别
如何更改摘要功能,以便我可以传递R的比例和S的比例,并为每个变量获得一组比例结果?
答案 0 :(得分:1)
这是调整原始功能的非常粗略的尝试:
summaryStatistics <- function(df, a, b, levels1, levels2) {
x <- df[,a]
y <- df[,b]
xx <- na.omit(x)
yy <- na.omit(y)
levels2 <- levels2[levels2 != 0]
answer1 <- c(levels1, "<NA>", "sum", "length", "mean", "standard.deviation", "var", "median", "min", "max", "quantile.0", "quantile.25", "quantile.50", "quantile.75", "quantile.100", "skew", "kurtosis")
value1 <- c(as.numeric(table(factor(x, levels1))), nrow(df[is.na(x)==T,]), sum(xx), length(x), mean(xx), sqrt(var(xx)), (var(xx)), median(xx), min(xx), max(xx), as.numeric(quantile(xx)), sum((xx-mean(xx))^3/sqrt(var(xx))^3)/length(x), sum((xx-mean(xx))^4/sqrt(var(xx))^4)/length(x) - 3)
answer2 <- c(levels2, "<NA>", "sum", "length", "mean", "standard.deviation", "var", "median", "min", "max", "quantile.0", "quantile.25", "quantile.50", "quantile.75", "quantile.100", "skew", "kurtosis")
value2 <- c(as.numeric(table(factor(y, levels2))), nrow(df[is.na(y)==T,]), sum(yy), length(y), mean(yy), sqrt(var(yy)), (var(yy)), median(yy), min(yy), max(yy), as.numeric(quantile(yy)), sum((yy-mean(yy))^3/sqrt(var(yy))^3)/length(y), sum((yy-mean(yy))^4/sqrt(var(yy))^4)/length(y) - 3)
answer <- c(answer1, answer2)
question <- c(rep(a, length(answer1)), rep(b, length(answer2)))
value <- c(value1, value2)
result <- data.frame(answer, question, value)
return(result)
}
用法是:
summaryStatistics(df, 'R', 'S', c(0:10), c(0:20))
它很难看,但最终结果也是如此:)
答案 1 :(得分:0)
我找到了一个解决方法,我为每对问题和可能的答案调用了汇总函数。
summaryStatistics <- function(x,levels) {
xx <- na.omit(x)
c(table(factor(x, levels=levels), useNA='always', exclude=NULL),
sum=sum(xx),
length=length(x),
mean=mean(xx),
standard.deviation=sqrt(var(xx)),
var=(var(xx)),
median=median(xx),
min=min(xx),
max=max(xx),
quantile=quantile(xx),
skew=sum((xx-mean(xx))^3/sqrt(var(xx))^3)/length(x) ,
kurtosis=sum((xx-mean(xx))^4/sqrt(var(xx))^4)/length(x) - 3
)
}
# create the test data frame
Id <- c(1,2,3,4,5,6,7,8,9,10)
ClassA <- c(1,NA,3,1,1,2,1,4,5,3)
ClassB <- c(2,1,1,3,3,2,1,1,3,3)
R <- c(1,2,3,NA,9,2,4,5,6,7)
S <- c(3,7,NA,9,5,8,7,NA,7,6)
W <- c(4,5,6,7,2,4,5,6,7,8)
df <- data.frame(Id,ClassA,ClassB,R,S,W)
ClassAAnswers <- c(1:5,NA)
ClassBAnswers <- c(1:5,NA)
RAnswers <- c(0:10,NA);
SAnswers <- c(0:20,NA);
WAnswers <- c(0:30,NA);
# create the result
result <- setNames(
nm=c('answer','question','value'),
as.data.frame(
as.table(
simplify2array(
lapply(
df[c('R')],
summaryStatistics,
RAnswers
)
)
)
)
)
result <- rbind( result,
setNames(
nm=c('answer','question','value'),
as.data.frame(
as.table(
simplify2array(
lapply(
df[c('S')],
summaryStatistics,
SAnswers
)
)
)
)
)
)
result <- rbind( result,
setNames(
nm=c('answer','question','value'),
as.data.frame(
as.table(
simplify2array(
lapply(
df[c('W')],
summaryStatistics,
WAnswers
)
)
)
)
)
)
# change the order to question, answer, value
result <- result[, c(2, 1, 3)]
# add the filter
result <- cbind(filter='None',result)
# return the result
result
返回
filter question answer value
1 None R 0 0.0000000
2 None R 1 1.0000000
3 None R 2 2.0000000
4 None R 3 1.0000000
5 None R 4 1.0000000
6 None R 5 1.0000000
7 None R 6 1.0000000
8 None R 7 1.0000000
9 None R 8 0.0000000
10 None R 9 1.0000000
11 None R 10 0.0000000
12 None R <NA> 1.0000000
13 None R sum 39.0000000
14 None R length 10.0000000
15 None R mean 4.3333333
16 None R standard.deviation 2.6457513
17 None R var 7.0000000
18 None R median 4.0000000
19 None R min 1.0000000
20 None R max 9.0000000
21 None R quantile.0% 1.0000000
22 None R quantile.25% 2.0000000
23 None R quantile.50% 4.0000000
24 None R quantile.75% 6.0000000
25 None R quantile.100% 9.0000000
26 None R skew 0.3275692
27 None R kurtosis -1.5333333
28 None S 0 0.0000000
29 None S 1 0.0000000
30 None S 2 0.0000000
31 None S 3 1.0000000
32 None S 4 0.0000000
33 None S 5 1.0000000
34 None S 6 1.0000000
35 None S 7 3.0000000
36 None S 8 1.0000000
37 None S 9 1.0000000
38 None S 10 0.0000000
39 None S 11 0.0000000
40 None S 12 0.0000000
41 None S 13 0.0000000
42 None S 14 0.0000000
43 None S 15 0.0000000
44 None S 16 0.0000000
45 None S 17 0.0000000
46 None S 18 0.0000000
47 None S 19 0.0000000
48 None S 20 0.0000000
49 None S <NA> 2.0000000
50 None S sum 52.0000000
51 None S length 10.0000000
52 None S mean 6.5000000
53 None S standard.deviation 1.8516402
54 None S var 3.4285714
55 None S median 7.0000000
56 None S min 3.0000000
57 None S max 9.0000000
58 None S quantile.0% 3.0000000
59 None S quantile.25% 5.7500000
60 None S quantile.50% 7.0000000
61 None S quantile.75% 7.2500000
62 None S quantile.100% 9.0000000
63 None S skew -0.4252986
64 None S kurtosis -1.3028646
65 None W 0 0.0000000
66 None W 1 0.0000000
67 None W 2 1.0000000
68 None W 3 0.0000000
69 None W 4 2.0000000
70 None W 5 2.0000000
71 None W 6 2.0000000
72 None W 7 2.0000000
73 None W 8 1.0000000
74 None W 9 0.0000000
75 None W 10 0.0000000
76 None W 11 0.0000000
77 None W 12 0.0000000
78 None W 13 0.0000000
79 None W 14 0.0000000
80 None W 15 0.0000000
81 None W 16 0.0000000
82 None W 17 0.0000000
83 None W 18 0.0000000
84 None W 19 0.0000000
85 None W 20 0.0000000
86 None W 21 0.0000000
87 None W 22 0.0000000
88 None W 23 0.0000000
89 None W 24 0.0000000
90 None W 25 0.0000000
91 None W 26 0.0000000
92 None W 27 0.0000000
93 None W 28 0.0000000
94 None W 29 0.0000000
95 None W 30 0.0000000
96 None W <NA> 0.0000000
97 None W sum 54.0000000
98 None W length 10.0000000
99 None W mean 5.4000000
100 None W standard.deviation 1.7763883
101 None W var 3.1555556
102 None W median 5.5000000
103 None W min 2.0000000
104 None W max 8.0000000
105 None W quantile.0% 2.0000000
106 None W quantile.25% 4.2500000
107 None W quantile.50% 5.5000000
108 None W quantile.75% 6.7500000
109 None W quantile.100% 8.0000000
110 None W skew -0.3339582
111 None W kurtosis -0.9871315
这就是我要找的。 p>
答案 2 :(得分:0)
我最终使用另一个函数来对summary函数进行多次调用。
来自Extracting a vector from a list for a R function的新代码
# create the summary function
summaryStatistics <- function(x,levels) {
xx <- na.omit(x)
c(table(factor(x, levels=levels), useNA='always', exclude=NULL),
sum=sum(xx),
length=length(x),
mean=mean(xx),
standard.deviation=sqrt(var(xx)),
var=(var(xx)),
median=median(xx),
min=min(xx),
max=max(xx),
quantile=quantile(xx),
skew=sum((xx-mean(xx))^3/sqrt(var(xx))^3)/length(x) ,
kurtosis=sum((xx-mean(xx))^4/sqrt(var(xx))^4)/length(x) - 3
)
}
# create the function that steps through the summary function
extractSummaryDataframe <- function( questions.dataframe, answers.list, filter) {
result <- data.frame(
answer=factor(),
question=factor(),
value=double()
) ;
listIndex <- 0 ;
for ( name in names(questions.dataframe)){
listIndex <- listIndex + 1 ;
result <- rbind( result,
setNames(
nm=c('answer','question','value'),
as.data.frame(
as.table(
simplify2array(
lapply(
questions.dataframe[c(name)],
summaryStatistics,
answers.list[[listIndex]]
)
)
)
)
)
)
}
result <- result[, c(2, 1, 3)] ;
result <- cbind(filter=filter,result) ;
result
}
# create the test data frame
Id <- c(1,2,3,4,5,6,7,8,9,10)
ClassA <- c(1,NA,3,1,1,2,1,4,5,3)
ClassB <- c(2,1,1,3,3,2,1,1,3,3)
R <- c(1,2,3,NA,9,2,4,5,6,7)
S <- c(3,7,NA,9,5,8,7,NA,7,6)
W <- c(4,5,6,7,2,4,5,6,7,8)
df <- data.frame(Id,ClassA,ClassB,R,S,W)
ClassAAnswers <- c(1:5,NA)
ClassBAnswers <- c(1:5,NA)
RAnswers <- c(0:10,NA);
SAnswers <- c(0:20,NA);
WAnswers <- c(0:30,NA);
answers.list <- list(RAnswers,SAnswers,WAnswers);
RSW.df <- df[c('R','S','W')];
# create the result
result <- extractSummaryDataframe( RSW.df, answers.list, 'None') ;
# return the result
result
返回
filter question answer value
1 None R 0 0.0000000
2 None R 1 1.0000000
3 None R 2 2.0000000
4 None R 3 1.0000000
5 None R 4 1.0000000
6 None R 5 1.0000000
7 None R 6 1.0000000
8 None R 7 1.0000000
9 None R 8 0.0000000
10 None R 9 1.0000000
11 None R 10 0.0000000
12 None R <NA> 1.0000000
13 None R sum 39.0000000
14 None R length 10.0000000
15 None R mean 4.3333333
16 None R standard.deviation 2.6457513
17 None R var 7.0000000
18 None R median 4.0000000
19 None R min 1.0000000
20 None R max 9.0000000
21 None R quantile.0% 1.0000000
22 None R quantile.25% 2.0000000
23 None R quantile.50% 4.0000000
24 None R quantile.75% 6.0000000
25 None R quantile.100% 9.0000000
26 None R skew 0.3275692
27 None R kurtosis -1.5333333
28 None S 0 0.0000000
29 None S 1 0.0000000
30 None S 2 0.0000000
31 None S 3 1.0000000
32 None S 4 0.0000000
33 None S 5 1.0000000
34 None S 6 1.0000000
35 None S 7 3.0000000
36 None S 8 1.0000000
37 None S 9 1.0000000
38 None S 10 0.0000000
39 None S 11 0.0000000
40 None S 12 0.0000000
41 None S 13 0.0000000
42 None S 14 0.0000000
43 None S 15 0.0000000
44 None S 16 0.0000000
45 None S 17 0.0000000
46 None S 18 0.0000000
47 None S 19 0.0000000
48 None S 20 0.0000000
49 None S <NA> 2.0000000
50 None S sum 52.0000000
51 None S length 10.0000000
52 None S mean 6.5000000
53 None S standard.deviation 1.8516402
54 None S var 3.4285714
55 None S median 7.0000000
56 None S min 3.0000000
57 None S max 9.0000000
58 None S quantile.0% 3.0000000
59 None S quantile.25% 5.7500000
60 None S quantile.50% 7.0000000
61 None S quantile.75% 7.2500000
62 None S quantile.100% 9.0000000
63 None S skew -0.4252986
64 None S kurtosis -1.3028646
65 None W 0 0.0000000
66 None W 1 0.0000000
67 None W 2 1.0000000
68 None W 3 0.0000000
69 None W 4 2.0000000
70 None W 5 2.0000000
71 None W 6 2.0000000
72 None W 7 2.0000000
73 None W 8 1.0000000
74 None W 9 0.0000000
75 None W 10 0.0000000
76 None W 11 0.0000000
77 None W 12 0.0000000
78 None W 13 0.0000000
79 None W 14 0.0000000
80 None W 15 0.0000000
81 None W 16 0.0000000
82 None W 17 0.0000000
83 None W 18 0.0000000
84 None W 19 0.0000000
85 None W 20 0.0000000
86 None W 21 0.0000000
87 None W 22 0.0000000
88 None W 23 0.0000000
89 None W 24 0.0000000
90 None W 25 0.0000000
91 None W 26 0.0000000
92 None W 27 0.0000000
93 None W 28 0.0000000
94 None W 29 0.0000000
95 None W 30 0.0000000
96 None W <NA> 0.0000000
97 None W sum 54.0000000
98 None W length 10.0000000
99 None W mean 5.4000000
100 None W standard.deviation 1.7763883
101 None W var 3.1555556
102 None W median 5.5000000
103 None W min 2.0000000
104 None W max 8.0000000
105 None W quantile.0% 2.0000000
106 None W quantile.25% 4.2500000
107 None W quantile.50% 5.5000000
108 None W quantile.75% 6.7500000
109 None W quantile.100% 8.0000000
110 None W skew -0.3339582
111 None W kurtosis -0.9871315
这正是我所寻找的: - )。