我有一个看起来像这样的data.frame:
C1 C2 C3 C4 1 -1 -1 1 1 1 -1 1 1 1 -1 1 1 -1 1 -1
我想按列计算-1和1次,所以我使用了:
tab = apply(DF,2,table)
我使用以下字符串后:
final <- as.data.frame(do.call("cbind", tab))
将结果写为data.frame。不幸的是,由于第一个元素,它给了我一个错误:
标签[[1]]
1
4标签[[2]]
-1 1
2 2..........
所以我想将关于-1频率的tab [[1]]添加0,以便能够将结果写为data.frame。
有人可以帮我吗?
最佳
F。
答案 0 :(得分:4)
第三种方式:
x <- read.table(text = "C1 C2 C3 C4
1 -1 -1 1
1 1 -1 1
1 1 -1 1
1 -1 1 -1 ", header = TRUE)
sapply(sapply(x, factor, levels = c(1, -1), simplify = FALSE), table)
C1 C2 C3 C4
1 4 2 1 3
-1 0 2 3 1
一些基准测试:
xx <- as.data.frame(matrix(sample(c(-1,1), 1e7, replace=TRUE), ncol=100))
Roland <- function(DF) {
res <- table(stack(DF))
res2 <- as.data.frame(res)
reshape(res2, timevar = "ind", idvar = "values", direction = "wide")
}
Roman <- function(x) {
sapply(sapply(x, factor, levels = c(1, -1), simplify = FALSE), table)
}
user20650 <- function(x) {
rbind(colSums(x == 1), colSums(x==-1))
}
require(microbenchmark)
microbenchmark(m1 <- Roland(xx), m2 <- Roman(xx), m3 <- user20650(xx), times = 2)
Unit: milliseconds
expr min lq median uq max neval
m1 <- Roland(xx) 17624.6297 17624.6297 18116.6595 18608.6893 18608.6893 2
m2 <- Roman(xx) 13838.2030 13838.2030 14301.9159 14765.6288 14765.6288 2
m3 <- user20650(xx) 786.3689 786.3689 788.7253 791.0818 791.0818 2
答案 1 :(得分:2)
DF <- read.table(text="C1 C2 C3 C4
1 -1 -1 1
1 1 -1 1
1 1 -1 1
1 -1 1 -1 ",header=TRUE)
res <- table(stack(DF))
# ind
# values C1 C2 C3 C4
# -1 0 2 3 1
# 1 4 2 1 3
res2 <- as.data.frame(res)
# values ind Freq
# 1 -1 C1 0
# 2 1 C1 4
# 3 -1 C2 2
# 4 1 C2 2
# 5 -1 C3 3
# 6 1 C3 1
# 7 -1 C4 1
# 8 1 C4 3
reshape(res2, timevar = "ind", idvar = "values", direction = "wide")
# values Freq.C1 Freq.C2 Freq.C3 Freq.C4
# 1 -1 0 2 3 1
# 2 1 4 2 1 3
另一种选择是res <- ftable(stack(DF))
,可以使用write.ftable
直接将其写入文件。