我有一张看起来像的桌子:
> head(test,10)
# A tibble: 10 x 16
Question_1 Question_2 Question_3 Question_4 Question_5 Question_6 Question_7 Question_8 Question_9
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 B C C E C A C E C
2 C C C B C A E D C
3 B C C E C A C E C
4 C C C D C A C D C
5 B B C B A A A D C
6 C C C E BLANK A C E C
7 C C C E C A E E C
8 B C C E C A C D C
9 C C C E C A C D C
10 D C E B C A A D C
并想转置,所以我每行有一个问题,在A,B,C,D,E,BLANKS
的6个单独列中计数。
答案 0 :(得分:4)
我们可以将gather
转换为“长”格式,获取“键”,“值”列的count
,并spread
将其转换为“宽”格式
library(tidyverse)
gather(test) %>%
count(key, value) %>%
spread(value, n, fill = 0)
或使用melt/dcast
library(data.table)
dcast(melt(setDT(test), measure = patterns("^Question")), variable ~ value)
或者在base R
中通过复制'test'的列名而无循环,而unlist
复制'test'并获得table
table(names(test)[col(test)], unlist(test))
# A B BLANK C D E
# Question_1 0 4 0 5 1 0
# Question_2 0 1 0 9 0 0
# Question_3 0 0 0 9 0 1
# Question_4 0 3 0 0 1 6
# Question_5 1 0 1 8 0 0
# Question_6 10 0 0 0 0 0
# Question_7 2 0 0 6 0 2
# Question_8 0 0 0 0 6 4
# Question_9 0 0 0 10 0 0
注意:无需循环播放
df2 <- test[rep(seq_len(nrow(test)), 1e5), ]
system.time({
vals <- unique(unlist(df2))
t(sapply(df2, function(x) table(factor(x, levels = vals))))
})
# user system elapsed
# 6.987 0.367 7.293
system.time({
table(names(df2)[col(df2)], unlist(df2))
})
# user system elapsed
# 6.355 0.407 6.720
system.time({
gather(df2) %>%
count(key, value) %>%
spread(value, n, fill = 0)
})
# user system elapsed
# 0.567 0.125 0.695
system.time({
dcast(melt(setDT(df2), measure = patterns("^Question")), variable ~ value)
})
# user system elapsed
# 0.789 0.018 0.195
test <- structure(list(Question_1 = c("B", "C", "B", "C", "B", "C", "C",
"B", "C", "D"), Question_2 = c("C", "C", "C", "C", "B", "C",
"C", "C", "C", "C"), Question_3 = c("C", "C", "C", "C", "C",
"C", "C", "C", "C", "E"), Question_4 = c("E", "B", "E", "D",
"B", "E", "E", "E", "E", "B"), Question_5 = c("C", "C", "C",
"C", "A", "BLANK", "C", "C", "C", "C"), Question_6 = c("A", "A",
"A", "A", "A", "A", "A", "A", "A", "A"), Question_7 = c("C",
"E", "C", "C", "A", "C", "E", "C", "C", "A"), Question_8 = c("E",
"D", "E", "D", "D", "E", "E", "D", "D", "D"), Question_9 = c("C",
"C", "C", "C", "C", "C", "C", "C", "C", "C")),
class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10"))
答案 1 :(得分:3)
R的基本技巧可能是获取数据帧的所有唯一值,并使用sapply
并计算列中每个值的频率。
vals <- unique(unlist(test))
t(sapply(test, function(x) table(factor(x, levels = vals))))
# B C D E A BLANK
#Question_1 4 5 1 0 0 0
#Question_2 1 9 0 0 0 0
#Question_3 0 9 0 1 0 0
#Question_4 3 0 1 6 0 0
#Question_5 0 8 0 0 1 1
#Question_6 0 0 0 0 10 0
#Question_7 0 6 0 2 2 0
#Question_8 0 0 6 4 0 0
#Question_9 0 10 0 0 0 0