取三个不同列的频率

时间:2017-07-27 10:19:29

标签: r

我有一个这样的数据框:

df <- structure(list(col1 = structure(c(1L, 1L, 2L, 3L, 1L, 3L, 1L, 
3L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 4L), .Label = c("stock1", 
"stock2", "stock3", "stock4"), class = "factor"), col2 = structure(c(4L, 
5L, 7L, 6L, 5L, 5L, 5L, 6L, 6L, 8L, 8L, 4L, 3L, 3L, 1L, 2L, 3L
), .Label = c("comapny1", "comapny1+comapny4", "comapny4", "company1", 
"company2", "company2+company1", "company3", "company4"), class = "factor"), 
    col3 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L), .Label = c("predictor1", "predictor2"
    ), class = "factor")), .Names = c("col1", "col2", "col3"), class = "data.frame", row.names = c(NA, 
-17L))

我想从三列中获取频率。

预期输出

df2 <- structure(list(col1 = structure(c(1L, 1L, 1L, 2L, 4L, 1L, 1L, 
3L, 3L, 1L, 2L, 1L), .Label = c("stock1", "stock2", "stock3", 
"stock4"), class = "factor"), col2 = structure(c(1L, 2L, 3L, 
3L, 3L, 4L, 5L, 5L, 6L, 6L, 7L, 8L), .Label = c("comapany1", 
"comapany1+comapany4", "comapany4", "company1", "company2", "company2+company1", 
"company3", "company4"), class = "factor"), col3 = structure(c(2L, 
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("predictor1", 
"predictor2"), class = "factor"), frequency = c(1L, 1L, 1L, 1L, 
1L, 2L, 3L, 1L, 2L, 1L, 1L, 2L)), .Names = c("col1", "col2", 
"col3", "frequency"), class = "data.frame", row.names = c(NA, 
-12L))

怎么可能成功呢?

1 个答案:

答案 0 :(得分:2)

我们可以使用count

library(dplyr)
count(df, col1, col2, col3)
# A tibble: 12 x 4
#     col1              col2       col3     n
#   <fctr>            <fctr>     <fctr> <int>
# 1 stock1          comapny1 predictor2     1
# 2 stock1 comapny1+comapny4 predictor2     1
# 3 stock1          comapny4 predictor2     1
# 4 stock1          company1 predictor1     2
# 5 stock1          company2 predictor1     3
# 6 stock1 company2+company1 predictor1     1
# 7 stock1          company4 predictor1     2
# 8 stock2          comapny4 predictor2     1
# 9 stock2          company3 predictor1     1
#10 stock3          company2 predictor1     1
#11 stock3 company2+company1 predictor1     2
#12 stock4          comapny4 predictor2     1

data.table

library(data.table)
setDT(df)[, .N, .(col1, col2, col3)]