我在内存中有一个data.table表。它由许多行组成,包含这些列
key, c1.min, c2.min, c3.min, c1.max, c2.max, c3.max, c1.sd, c2.sd, c3.sd
我想返回一个新表
key, c1, c2, c3
,其中
c1 = c1.min + c1.max + c1.sd
c2 = c2.min + c2.max + c2.sd
c3 = c3.min + c3.max + c3.sd
答案 0 :(得分:1)
试试这个例子:
library(data.table)
#dummy data
myData <-
data.table(
data.frame(
key = 1:10,
c1.min = 1:10,
c2.min = 10:19,
c3.min = 100:109,
c1.max = 1:10,
c2.max = 1:10,
c3.max = 1:10,
c1.sd = 1:10,
c2.sd = 1:10,
c3.sd = 1:10))
# using basic regex match
cbind(key = myData$key,
sapply(c("c1", "c2", "c3"),function(i){
myColnames <- colnames(myData)
rowSums(myData[, grepl(i, myColnames), with = FALSE])
}))
# using manual sum
myData[ , list(key,
c1 = c1.min + c1.max + c1.sd,
c2 = c2.min + c2.max + c2.sd,
c3 = c3.min + c3.max + c3.sd) ]
答案 1 :(得分:1)
这是melt
稍微不同的选项。我们在patterns
参数中指定measure
,转换为&#39; long&#39;格式,然后按&#39;键&#39;并指定.SDcols
,获取这些列的sum
。
melt(myData, measure = patterns("^c1", "^c2", "^c3"),
value.name = c('c1', 'c2', 'c3'))[, lapply(.SD, sum) , key, .SDcols = c1:c3]
# key c1 c2 c3
# 1: 1 3 12 102
# 2: 2 6 15 105
# 3: 3 9 18 108
# 4: 4 12 21 111
# 5: 5 15 24 114
# 6: 6 18 27 117
# 7: 7 21 30 120
# 8: 8 24 33 123
# 9: 9 27 36 126
#10: 10 30 39 129
答案 2 :(得分:1)
另一种选择:
library(dplyr)
library(tidyr)
myData %>%
gather(k, v, -key) %>%
separate(k, into = c("l", "s")) %>%
group_by(key, l) %>%
summarise(value = sum(v)) %>%
spread(l, value)
给出了:
#Source: local data frame [10 x 4]
#Groups: key [10]
#
# key c1 c2 c3
#* <int> <int> <int> <int>
#1 1 3 12 102
#2 2 6 15 105
#3 3 9 18 108
#4 4 12 21 111
#5 5 15 24 114
#6 6 18 27 117
#7 7 21 30 120
#8 8 24 33 123
#9 9 27 36 126
#10 10 30 39 129
答案 3 :(得分:0)
您可以使用基础包
myData$c1 <- apply(myData[ ,c("c1.min","c1.max","c1.sd")] , 1 , sum)
myData$c2 <- apply(myData[ ,c("c2.min","c2.max","c2.sd")] , 1 , sum)
myData$c3 <- apply(myData[ ,c("c3.min","c3.max","c3.sd")] , 1 , sum)
myData <- myData[,c("key","c1","c2","c3")]
print(myData)
key c1 c2 c3
1 1 3 12 102
2 2 6 15 105
3 3 9 18 108
4 4 12 21 111
5 5 15 24 114
6 6 18 27 117
7 7 21 30 120
8 8 24 33 123
9 9 27 36 126
10 10 30 39 129
或者您可以为总和列定义函数
abc <- function(x)apply(x,1,sum)
c1 <- abc(myData[ ,c("c1.min","c1.max","c1.sd")])
c2 <- abc(myData[ ,c("c2.min","c2.max","c2.sd")])
c3 <- abc(myData[ ,c("c3.min","c3.max","c3.sd")])
mydata1 <- as.data.frame(cbind(Key=myData$key,c1,c2,c3))
> mydata1
Key c1 c2 c3
1 1 3 12 102
2 2 6 15 105
3 3 9 18 108
4 4 12 21 111
5 5 15 24 114
6 6 18 27 117
7 7 21 30 120
8 8 24 33 123
9 9 27 36 126
10 10 30 39 129