如何通过列的所有组合对融化?

时间:2015-05-14 12:53:48

标签: sql r sqlite dplyr

我有下表,我想总结每对通道的每条路径的访问次数,即 - 这对通道一起出现的路径的访问总和。

输入:

   Channel PathNum visits
1       C1       1      5
2       C2       1      5
3       C3       1      5
4       C1       2      3
5       C2       2      3
6       C1       3      1
7       C4       4      4
8       C5       5     13
9       C6       5     13
10      C6       6      7
11      C6       6      7

输出:

   Channel1 Channel2 visits
1        C1       C1      9
2        C1       C2      8
3        C1       C3      5
4        C1       C4      0
5        C1       C5      0
6        C1       C6      0
7        C2       C2      8
8        C2       C3      5
9        C2       C4      0
10       C2       C5      0
11       C2       C6      0
12       C3       C3      5
13       C3       C4      0
14       C3       C5      0
15       C3       C6      0
16       C4       C4      4
17       C4       C5      0
18       C4       C6      0
19       C5       C5     13
20       C5       C6     13
21       C6       C6     20

以下是使用for循环执行上述示例的一些R代码:

df1 = data.frame(Channel=c("C1","C2","C3","C1","C2","C1","C4","C5","C6","C6","C6"), PathNum = c(1,1,1,2,2,3,4,5,5,6,6), visits=c(5,5,5,3,3,1,4,13,13,7,7), stringsAsFactors=FALSE)
df2 =dcast(data = df1, PathNum ~ Channel, fun.aggregate=function(x){sum(x)/length(x)})
Channel1=NULL
Channel2=NULL
vis=NULL
for (i in 1:length(unique(df1$Channel))){
  for (j in i:length(unique(df1$Channel))){
    Channel1=c(Channel1, unique(df1$Channel)[i])
    Channel2=c(Channel2, unique(df1$Channel)[j])
    vis=c(vis,sum(df2[!is.na(df2[,unique(df1$Channel)[i]]) & !is.na(df2[,unique(df1$Channel)[j]]) & df2[,unique(df1$Channel)[j]]>0  , unique(df1$Channel)[i]]))
  } 
}

outframe = data.frame(Channel1=Channel1, Channel2=Channel2, visits=vis)

这适用于这个小例子,但我想知道是否有一种很好的方法可以使用dplyr或SQLite。

1 个答案:

答案 0 :(得分:2)

这是一种方式:

require(data.table)
require(gtools)
DT <- data.table(df1)

# get combos
uC   <- unique(DT$Channel)
cn   <- combinations(length(uC),2,repeats.allowed=TRUE)
cn[] <- uC[cn]

# get combos conditional on path
sharedPaths <- unique(DT)[,{
  cn <- combinations(.N,2,repeats.allowed=TRUE)
  list(v=visits[1],c1=Channel[cn[,1]],c2=Channel[cn[,2]])
},by=PathNum]   

# merge and sum
setkey(sharedPaths,c1,c2)
sharedPaths[J(as.data.table(cn)),sum(v,na.rm=TRUE),by=.EACHI]