我有一个像这样的数据表
require(data.table)
dt <- data.table(a= c("a","a","b","b","b"), b= c("a","a","c","c","e"), c=c("d","d","b","b","b"))
我想从所有列 计算频率。我知道怎么一个接一个地做,但我想在一条指令中做,因为我的数据有很多列。
结果必须是这个:
dt[,a1:=.N, by = c("a")]
dt[,a2:=.N, by = c("b")]
dt[,a3:=.N, by = c("c")]
答案 0 :(得分:-1)
require(data.table)
dt <- data.table(a= c("a","a","b","b","b"),
b= c("a","a","c","c","e"),
c=c("d","d","b","b","b"))
#dt
# a b c
#1: a a d
#2: a a d
#3: b c b
#4: b c b
#5: b e b
l=lapply(seq_along(colnames(dt)),
function(i) dt[,eval(colnames(dt)[i]),with=F][, x:=.N,by=eval(colnames(dt)[i])])
#l
#[[1]]
# a x
#1: a 2
#2: a 2
#3: b 3
#4: b 3
#5: b 3
#[[2]]
# b x
#1: a 2
#2: a 2
#3: c 2
#4: c 2
#5: e 1
#[[3]]
# c x
#1: d 2
#2: d 2
#3: b 3
#4: b 3
#5: b 3
df = as.data.frame(l)
# replacing alternate column names with concatenating "_count" to it
colnames(df)[seq(2,length(colnames(df)),2)]=
paste0(colnames(df)[seq(1,length(colnames(df)),2)],"_count")
#df
# a a_count b b_count c c_count
#1 a 2 a 2 d 2
#2 a 2 a 2 d 2
#3 b 3 c 2 b 3
#4 b 3 c 2 b 3
#5 b 3 e 1 b 3