我有下表,我想总结每对通道的每条路径的访问次数,即 - 这对通道一起出现的路径的访问总和。
输入:
Channel PathNum visits
1 C1 1 5
2 C2 1 5
3 C3 1 5
4 C1 2 3
5 C2 2 3
6 C1 3 1
7 C4 4 4
8 C5 5 13
9 C6 5 13
10 C6 6 7
11 C6 6 7
输出:
Channel1 Channel2 visits
1 C1 C1 9
2 C1 C2 8
3 C1 C3 5
4 C1 C4 0
5 C1 C5 0
6 C1 C6 0
7 C2 C2 8
8 C2 C3 5
9 C2 C4 0
10 C2 C5 0
11 C2 C6 0
12 C3 C3 5
13 C3 C4 0
14 C3 C5 0
15 C3 C6 0
16 C4 C4 4
17 C4 C5 0
18 C4 C6 0
19 C5 C5 13
20 C5 C6 13
21 C6 C6 20
以下是使用for循环执行上述示例的一些R代码:
df1 = data.frame(Channel=c("C1","C2","C3","C1","C2","C1","C4","C5","C6","C6","C6"), PathNum = c(1,1,1,2,2,3,4,5,5,6,6), visits=c(5,5,5,3,3,1,4,13,13,7,7), stringsAsFactors=FALSE)
df2 =dcast(data = df1, PathNum ~ Channel, fun.aggregate=function(x){sum(x)/length(x)})
Channel1=NULL
Channel2=NULL
vis=NULL
for (i in 1:length(unique(df1$Channel))){
for (j in i:length(unique(df1$Channel))){
Channel1=c(Channel1, unique(df1$Channel)[i])
Channel2=c(Channel2, unique(df1$Channel)[j])
vis=c(vis,sum(df2[!is.na(df2[,unique(df1$Channel)[i]]) & !is.na(df2[,unique(df1$Channel)[j]]) & df2[,unique(df1$Channel)[j]]>0 , unique(df1$Channel)[i]]))
}
}
outframe = data.frame(Channel1=Channel1, Channel2=Channel2, visits=vis)
这适用于这个小例子,但我想知道是否有一种很好的方法可以使用dplyr或SQLite。
答案 0 :(得分:2)
这是一种方式:
require(data.table)
require(gtools)
DT <- data.table(df1)
# get combos
uC <- unique(DT$Channel)
cn <- combinations(length(uC),2,repeats.allowed=TRUE)
cn[] <- uC[cn]
# get combos conditional on path
sharedPaths <- unique(DT)[,{
cn <- combinations(.N,2,repeats.allowed=TRUE)
list(v=visits[1],c1=Channel[cn[,1]],c2=Channel[cn[,2]])
},by=PathNum]
# merge and sum
setkey(sharedPaths,c1,c2)
sharedPaths[J(as.data.table(cn)),sum(v,na.rm=TRUE),by=.EACHI]