我正在为报告创建摘要数据框。我已经能够手动创建我想要的数据框了。我正在创建一个函数来简化结果的创建。
手动流程
# create the summary function
summaryStatistics <- function(x,levels) {
xx <- na.omit(x)
c(table(factor(x, levels=levels), useNA='always', exclude=NULL),
sum=sum(xx),
length=length(x),
mean=mean(xx),
standard.deviation=sqrt(var(xx)),
var=(var(xx)),
median=median(xx),
min=min(xx),
max=max(xx),
quantile=quantile(xx),
skew=sum((xx-mean(xx))^3/sqrt(var(xx))^3)/length(x) ,
kurtosis=sum((xx-mean(xx))^4/sqrt(var(xx))^4)/length(x) - 3
)
}
# create the test data frame
Id <- c(1,2,3,4,5,6,7,8,9,10)
ClassA <- c(1,NA,3,1,1,2,1,4,5,3)
ClassB <- c(2,1,1,3,3,2,1,1,3,3)
R <- c(1,2,3,NA,9,2,4,5,6,7)
S <- c(3,7,NA,9,5,8,7,NA,7,6)
W <- c(4,5,6,7,2,4,5,6,7,8)
df <- data.frame(Id,ClassA,ClassB,R,S,W)
ClassAAnswers <- c(1:5,NA)
ClassBAnswers <- c(1:5,NA)
RAnswers <- c(0:10,NA);
SAnswers <- c(0:20,NA);
WAnswers <- c(0:30,NA);
answers.list <- list(RAnswers,SAnswers,WAnswers);
RSW.df <- df[c('R','S','W')];
# create the result
result <- setNames(
nm=c('answer','question','value'),
as.data.frame(
as.table(
simplify2array(
lapply(
df[c('R')],
summaryStatistics,
RAnswers
)
)
)
)
)
result <- rbind( result,
setNames(
nm=c('answer','question','value'),
as.data.frame(
as.table(
simplify2array(
lapply(
df[c('S')],
summaryStatistics,
SAnswers
)
)
)
)
)
)
result <- rbind( result,
setNames(
nm=c('answer','question','value'),
as.data.frame(
as.table(
simplify2array(
lapply(
df[c('W')],
summaryStatistics,
WAnswers
)
)
)
)
)
)
# change the order to question, answer, value
result <- result[, c(2, 1, 3)]
# add the filter
result <- cbind(filter='None',result)
# return the result
result
产生结果
filter question answer value
1 None R 0 0.0000000
2 None R 1 1.0000000
3 None R 2 2.0000000
4 None R 3 1.0000000
5 None R 4 1.0000000
6 None R 5 1.0000000
7 None R 6 1.0000000
8 None R 7 1.0000000
9 None R 8 0.0000000
10 None R 9 1.0000000
11 None R 10 0.0000000
12 None R <NA> 1.0000000
13 None R sum 39.0000000
14 None R length 10.0000000
15 None R mean 4.3333333
16 None R standard.deviation 2.6457513
17 None R var 7.0000000
18 None R median 4.0000000
19 None R min 1.0000000
20 None R max 9.0000000
21 None R quantile.0% 1.0000000
22 None R quantile.25% 2.0000000
23 None R quantile.50% 4.0000000
24 None R quantile.75% 6.0000000
25 None R quantile.100% 9.0000000
26 None R skew 0.3275692
27 None R kurtosis -1.5333333
28 None S 0 0.0000000
29 None S 1 0.0000000
30 None S 2 0.0000000
31 None S 3 1.0000000
32 None S 4 0.0000000
33 None S 5 1.0000000
34 None S 6 1.0000000
35 None S 7 3.0000000
36 None S 8 1.0000000
37 None S 9 1.0000000
38 None S 10 0.0000000
39 None S 11 0.0000000
40 None S 12 0.0000000
41 None S 13 0.0000000
42 None S 14 0.0000000
43 None S 15 0.0000000
44 None S 16 0.0000000
45 None S 17 0.0000000
46 None S 18 0.0000000
47 None S 19 0.0000000
48 None S 20 0.0000000
49 None S <NA> 2.0000000
50 None S sum 52.0000000
51 None S length 10.0000000
52 None S mean 6.5000000
53 None S standard.deviation 1.8516402
54 None S var 3.4285714
55 None S median 7.0000000
56 None S min 3.0000000
57 None S max 9.0000000
58 None S quantile.0% 3.0000000
59 None S quantile.25% 5.7500000
60 None S quantile.50% 7.0000000
61 None S quantile.75% 7.2500000
62 None S quantile.100% 9.0000000
63 None S skew -0.4252986
64 None S kurtosis -1.3028646
65 None W 0 0.0000000
66 None W 1 0.0000000
67 None W 2 1.0000000
68 None W 3 0.0000000
69 None W 4 2.0000000
70 None W 5 2.0000000
71 None W 6 2.0000000
72 None W 7 2.0000000
73 None W 8 1.0000000
74 None W 9 0.0000000
75 None W 10 0.0000000
76 None W 11 0.0000000
77 None W 12 0.0000000
78 None W 13 0.0000000
79 None W 14 0.0000000
80 None W 15 0.0000000
81 None W 16 0.0000000
82 None W 17 0.0000000
83 None W 18 0.0000000
84 None W 19 0.0000000
85 None W 20 0.0000000
86 None W 21 0.0000000
87 None W 22 0.0000000
88 None W 23 0.0000000
89 None W 24 0.0000000
90 None W 25 0.0000000
91 None W 26 0.0000000
92 None W 27 0.0000000
93 None W 28 0.0000000
94 None W 29 0.0000000
95 None W 30 0.0000000
96 None W <NA> 0.0000000
97 None W sum 54.0000000
98 None W length 10.0000000
99 None W mean 5.4000000
100 None W standard.deviation 1.7763883
101 None W var 3.1555556
102 None W median 5.5000000
103 None W min 2.0000000
104 None W max 8.0000000
105 None W quantile.0% 2.0000000
106 None W quantile.25% 4.2500000
107 None W quantile.50% 5.5000000
108 None W quantile.75% 6.7500000
109 None W quantile.100% 8.0000000
110 None W skew -0.3339582
111 None W kurtosis -0.9871315
这就是我正在寻找的。 p>
我创建了一个功能来逐步完成数据框和可能的答案。 如果我对矢量进行硬编码,我得到的结果与上面的结果一致。
extractSummaryDataframe <- function( questions.dataframe, answers.list, filter) {
result <- data.frame(
answer=factor(),
question=factor(),
value=double()
) ;
listIndex <- 0 ;
for ( name in names(questions.dataframe)){
listIndex <- listIndex + 1 ;
result <- rbind( result,
setNames(
nm=c('answer','question','value'),
as.data.frame(
as.table(
simplify2array(
lapply(
questions.dataframe[c(name)],
summaryStatistics,
c(0:10,NA)
)
)
)
)
)
)
}
result <- cbind(filter=filter,result) ;
result
}
extractSummaryDataframe( RSW.df, answers.list, 'None')
返回
filter answer question value
1 None 0 R 0.0000000
2 None 1 R 1.0000000
3 None 2 R 2.0000000
4 None 3 R 1.0000000
5 None 4 R 1.0000000
6 None 5 R 1.0000000
7 None 6 R 1.0000000
8 None 7 R 1.0000000
9 None 8 R 0.0000000
10 None 9 R 1.0000000
11 None 10 R 0.0000000
12 None <NA> R 1.0000000
13 None sum R 39.0000000
14 None length R 10.0000000
15 None mean R 4.3333333
16 None standard.deviation R 2.6457513
17 None var R 7.0000000
18 None median R 4.0000000
19 None min R 1.0000000
20 None max R 9.0000000
21 None quantile.0% R 1.0000000
22 None quantile.25% R 2.0000000
23 None quantile.50% R 4.0000000
24 None quantile.75% R 6.0000000
25 None quantile.100% R 9.0000000
26 None skew R 0.3275692
27 None kurtosis R -1.5333333
28 None 0 S 0.0000000
29 None 1 S 0.0000000
30 None 2 S 0.0000000
31 None 3 S 1.0000000
32 None 4 S 0.0000000
33 None 5 S 1.0000000
34 None 6 S 1.0000000
35 None 7 S 3.0000000
36 None 8 S 1.0000000
37 None 9 S 1.0000000
38 None 10 S 0.0000000
39 None <NA> S 2.0000000
40 None sum S 52.0000000
41 None length S 10.0000000
42 None mean S 6.5000000
43 None standard.deviation S 1.8516402
44 None var S 3.4285714
45 None median S 7.0000000
46 None min S 3.0000000
47 None max S 9.0000000
48 None quantile.0% S 3.0000000
49 None quantile.25% S 5.7500000
50 None quantile.50% S 7.0000000
51 None quantile.75% S 7.2500000
52 None quantile.100% S 9.0000000
53 None skew S -0.4252986
54 None kurtosis S -1.3028646
55 None 0 W 0.0000000
56 None 1 W 0.0000000
57 None 2 W 1.0000000
58 None 3 W 0.0000000
59 None 4 W 2.0000000
60 None 5 W 2.0000000
61 None 6 W 2.0000000
62 None 7 W 2.0000000
63 None 8 W 1.0000000
64 None 9 W 0.0000000
65 None 10 W 0.0000000
66 None <NA> W 0.0000000
67 None sum W 54.0000000
68 None length W 10.0000000
69 None mean W 5.4000000
70 None standard.deviation W 1.7763883
71 None var W 3.1555556
72 None median W 5.5000000
73 None min W 2.0000000
74 None max W 8.0000000
75 None quantile.0% W 2.0000000
76 None quantile.25% W 4.2500000
77 None quantile.50% W 5.5000000
78 None quantile.75% W 6.7500000
79 None quantile.100% W 8.0000000
80 None skew W -0.3339582
81 None kurtosis W -0.9871315
但是,如果我尝试使用每个列表项,因为它会逐步浏览
这样的名称extractSummaryDataframe&lt; - function(questions.dataframe,answers.list,filter){
result <- data.frame(
answer=factor(),
question=factor(),
value=double()
) ;
listIndex <- 0 ;
for ( name in names(questions.dataframe)){
listIndex <- listIndex + 1 ;
result <- rbind( result,
setNames(
nm=c('answer','question','value'),
as.data.frame(
as.table(
simplify2array(
lapply(
questions.dataframe[c(name)],
summaryStatistics,
answers.list[listIndex]
)
)
)
)
)
)
}
result <- cbind(filter=filter,result) ;
result
}
extractSummaryDataframe( RSW.df, answers.list, 'None')
filter
1 None
2 None
3 None
4 None
5 None
6 None
7 None
8 None
9 None
10 None
11 None
12 None
13 None
14 None
15 None
16 None
17 None
18 None
19 None
20 None
21 None
22 None
23 None
24 None
25 None
26 None
27 None
28 None
29 None
30 None
31 None
32 None
33 None
34 None
35 None
36 None
37 None
38 None
39 None
40 None
41 None
42 None
43 None
44 None
45 None
46 None
47 None
48 None
49 None
50 None
51 None
answer
1 c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, NA)
2 <NA>
3 sum
4 length
5 mean
6 standard.deviation
7 var
8 median
9 min
10 max
11 quantile.0%
12 quantile.25%
13 quantile.50%
14 quantile.75%
15 quantile.100%
16 skew
17 kurtosis
18 c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, NA)
19 <NA>
20 sum
21 length
22 mean
23 standard.deviation
24 var
25 median
26 min
27 max
28 quantile.0%
29 quantile.25%
30 quantile.50%
31 quantile.75%
32 quantile.100%
33 skew
34 kurtosis
35 c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, NA)
36 <NA>
37 sum
38 length
39 mean
40 standard.deviation
41 var
42 median
43 min
44 max
45 quantile.0%
46 quantile.25%
47 quantile.50%
48 quantile.75%
49 quantile.100%
50 skew
51 kurtosis
question value
1 R 0.0000000
2 R 10.0000000
3 R 39.0000000
4 R 10.0000000
5 R 4.3333333
6 R 2.6457513
7 R 7.0000000
8 R 4.0000000
9 R 1.0000000
10 R 9.0000000
11 R 1.0000000
12 R 2.0000000
13 R 4.0000000
14 R 6.0000000
15 R 9.0000000
16 R 0.3275692
17 R -1.5333333
18 S 0.0000000
19 S 10.0000000
20 S 52.0000000
21 S 10.0000000
22 S 6.5000000
23 S 1.8516402
24 S 3.4285714
25 S 7.0000000
26 S 3.0000000
27 S 9.0000000
28 S 3.0000000
29 S 5.7500000
30 S 7.0000000
31 S 7.2500000
32 S 9.0000000
33 S -0.4252986
34 S -1.3028646
35 W 0.0000000
36 W 10.0000000
37 W 54.0000000
38 W 10.0000000
39 W 5.4000000
40 W 1.7763883
41 W 3.1555556
42 W 5.5000000
43 W 2.0000000
44 W 8.0000000
45 W 2.0000000
46 W 4.2500000
47 W 5.5000000
48 W 6.7500000
49 W 8.0000000
50 W -0.3339582
51 W -0.9871315
这与我正在寻找的结果完全不同。
将answers.list [1]识别为c(0:10,NA)的语法是什么?
答案 0 :(得分:0)
我遇到的语法错误是answers.list [listIndex]需要是answers.list [[listIndex]]。
我通过使用类函数发现了这一点。
class(answers.list) returned list: expected.
class(answers.list[1]) returned list: unexpected.
class(answers.list[[1]]) returned integer: which is what I was looking for.
新代码
# create the summary function
summaryStatistics <- function(x,levels) {
xx <- na.omit(x)
c(table(factor(x, levels=levels), useNA='always', exclude=NULL),
sum=sum(xx),
length=length(x),
mean=mean(xx),
standard.deviation=sqrt(var(xx)),
var=(var(xx)),
median=median(xx),
min=min(xx),
max=max(xx),
quantile=quantile(xx),
skew=sum((xx-mean(xx))^3/sqrt(var(xx))^3)/length(x) ,
kurtosis=sum((xx-mean(xx))^4/sqrt(var(xx))^4)/length(x) - 3
)
}
# create the function that steps through the summary function
extractSummaryDataframe <- function( questions.dataframe, answers.list, filter) {
result <- data.frame(
answer=factor(),
question=factor(),
value=double()
) ;
listIndex <- 0 ;
for ( name in names(questions.dataframe)){
listIndex <- listIndex + 1 ;
result <- rbind( result,
setNames(
nm=c('answer','question','value'),
as.data.frame(
as.table(
simplify2array(
lapply(
questions.dataframe[c(name)],
summaryStatistics,
answers.list[[listIndex]]
)
)
)
)
)
)
}
result <- result[, c(2, 1, 3)] ;
result <- cbind(filter=filter,result) ;
result
}
# create the test data frame
Id <- c(1,2,3,4,5,6,7,8,9,10)
ClassA <- c(1,NA,3,1,1,2,1,4,5,3)
ClassB <- c(2,1,1,3,3,2,1,1,3,3)
R <- c(1,2,3,NA,9,2,4,5,6,7)
S <- c(3,7,NA,9,5,8,7,NA,7,6)
W <- c(4,5,6,7,2,4,5,6,7,8)
df <- data.frame(Id,ClassA,ClassB,R,S,W)
ClassAAnswers <- c(1:5,NA)
ClassBAnswers <- c(1:5,NA)
RAnswers <- c(0:10,NA);
SAnswers <- c(0:20,NA);
WAnswers <- c(0:30,NA);
answers.list <- list(RAnswers,SAnswers,WAnswers);
RSW.df <- df[c('R','S','W')];
# create the result
result <- extractSummaryDataframe( RSW.df, answers.list, 'None') ;
# return the result
result
返回
filter question answer value
1 None R 0 0.0000000
2 None R 1 1.0000000
3 None R 2 2.0000000
4 None R 3 1.0000000
5 None R 4 1.0000000
6 None R 5 1.0000000
7 None R 6 1.0000000
8 None R 7 1.0000000
9 None R 8 0.0000000
10 None R 9 1.0000000
11 None R 10 0.0000000
12 None R <NA> 1.0000000
13 None R sum 39.0000000
14 None R length 10.0000000
15 None R mean 4.3333333
16 None R standard.deviation 2.6457513
17 None R var 7.0000000
18 None R median 4.0000000
19 None R min 1.0000000
20 None R max 9.0000000
21 None R quantile.0% 1.0000000
22 None R quantile.25% 2.0000000
23 None R quantile.50% 4.0000000
24 None R quantile.75% 6.0000000
25 None R quantile.100% 9.0000000
26 None R skew 0.3275692
27 None R kurtosis -1.5333333
28 None S 0 0.0000000
29 None S 1 0.0000000
30 None S 2 0.0000000
31 None S 3 1.0000000
32 None S 4 0.0000000
33 None S 5 1.0000000
34 None S 6 1.0000000
35 None S 7 3.0000000
36 None S 8 1.0000000
37 None S 9 1.0000000
38 None S 10 0.0000000
39 None S 11 0.0000000
40 None S 12 0.0000000
41 None S 13 0.0000000
42 None S 14 0.0000000
43 None S 15 0.0000000
44 None S 16 0.0000000
45 None S 17 0.0000000
46 None S 18 0.0000000
47 None S 19 0.0000000
48 None S 20 0.0000000
49 None S <NA> 2.0000000
50 None S sum 52.0000000
51 None S length 10.0000000
52 None S mean 6.5000000
53 None S standard.deviation 1.8516402
54 None S var 3.4285714
55 None S median 7.0000000
56 None S min 3.0000000
57 None S max 9.0000000
58 None S quantile.0% 3.0000000
59 None S quantile.25% 5.7500000
60 None S quantile.50% 7.0000000
61 None S quantile.75% 7.2500000
62 None S quantile.100% 9.0000000
63 None S skew -0.4252986
64 None S kurtosis -1.3028646
65 None W 0 0.0000000
66 None W 1 0.0000000
67 None W 2 1.0000000
68 None W 3 0.0000000
69 None W 4 2.0000000
70 None W 5 2.0000000
71 None W 6 2.0000000
72 None W 7 2.0000000
73 None W 8 1.0000000
74 None W 9 0.0000000
75 None W 10 0.0000000
76 None W 11 0.0000000
77 None W 12 0.0000000
78 None W 13 0.0000000
79 None W 14 0.0000000
80 None W 15 0.0000000
81 None W 16 0.0000000
82 None W 17 0.0000000
83 None W 18 0.0000000
84 None W 19 0.0000000
85 None W 20 0.0000000
86 None W 21 0.0000000
87 None W 22 0.0000000
88 None W 23 0.0000000
89 None W 24 0.0000000
90 None W 25 0.0000000
91 None W 26 0.0000000
92 None W 27 0.0000000
93 None W 28 0.0000000
94 None W 29 0.0000000
95 None W 30 0.0000000
96 None W <NA> 0.0000000
97 None W sum 54.0000000
98 None W length 10.0000000
99 None W mean 5.4000000
100 None W standard.deviation 1.7763883
101 None W var 3.1555556
102 None W median 5.5000000
103 None W min 2.0000000
104 None W max 8.0000000
105 None W quantile.0% 2.0000000
106 None W quantile.25% 4.2500000
107 None W quantile.50% 5.5000000
108 None W quantile.75% 6.7500000
109 None W quantile.100% 8.0000000
110 None W skew -0.3339582
111 None W kurtosis -0.9871315
这正是我所寻找的: - )。