R - 具有特定前缀的Sum列

时间:2016-06-06 09:44:13

标签: r data.table

我在内存中有一个data.table表。它由许多行组成,包含这些列

key, c1.min, c2.min, c3.min, c1.max, c2.max, c3.max, c1.sd, c2.sd, c3.sd

我想返回一个新表

key, c1, c2, c3

,其中

c1 = c1.min + c1.max + c1.sd
c2 = c2.min + c2.max + c2.sd
c3 = c3.min + c3.max + c3.sd

4 个答案:

答案 0 :(得分:1)

试试这个例子:

library(data.table)

#dummy data
myData <- 
  data.table(
    data.frame(
      key = 1:10,
      c1.min = 1:10,
      c2.min = 10:19,
      c3.min = 100:109,
      c1.max = 1:10,
      c2.max = 1:10,
      c3.max = 1:10,
      c1.sd = 1:10,
      c2.sd = 1:10,
      c3.sd = 1:10))

# using basic regex match
cbind(key = myData$key,
      sapply(c("c1", "c2", "c3"),function(i){
        myColnames <- colnames(myData)
        rowSums(myData[, grepl(i, myColnames), with = FALSE])
      }))

# using manual sum
myData[ , list(key,
               c1 = c1.min + c1.max + c1.sd,
               c2 = c2.min + c2.max + c2.sd,
               c3 = c3.min + c3.max + c3.sd) ]

答案 1 :(得分:1)

这是melt稍微不同的选项。我们在patterns参数中指定measure,转换为&#39; long&#39;格式,然后按&#39;键&#39;并指定.SDcols,获取这些列的sum

melt(myData, measure = patterns("^c1", "^c2", "^c3"),
  value.name = c('c1', 'c2', 'c3'))[, lapply(.SD, sum) , key, .SDcols = c1:c3]
#    key c1 c2  c3
# 1:   1  3 12 102
# 2:   2  6 15 105
# 3:   3  9 18 108
# 4:   4 12 21 111
# 5:   5 15 24 114
# 6:   6 18 27 117
# 7:   7 21 30 120
# 8:   8 24 33 123
# 9:   9 27 36 126
#10:  10 30 39 129

答案 2 :(得分:1)

另一种选择:

library(dplyr)
library(tidyr)

myData %>%
  gather(k, v, -key) %>%
  separate(k, into = c("l", "s")) %>%
  group_by(key, l) %>% 
  summarise(value = sum(v)) %>%
  spread(l, value)

给出了:

#Source: local data frame [10 x 4]
#Groups: key [10]
#
#     key    c1    c2    c3
#*  <int> <int> <int> <int>
#1      1     3    12   102
#2      2     6    15   105
#3      3     9    18   108
#4      4    12    21   111
#5      5    15    24   114
#6      6    18    27   117
#7      7    21    30   120
#8      8    24    33   123
#9      9    27    36   126
#10    10    30    39   129

答案 3 :(得分:0)

您可以使用基础包

myData$c1 <- apply(myData[ ,c("c1.min","c1.max","c1.sd")] , 1 , sum)

myData$c2 <- apply(myData[ ,c("c2.min","c2.max","c2.sd")] , 1 , sum)

myData$c3 <- apply(myData[ ,c("c3.min","c3.max","c3.sd")] , 1 , sum)

myData <- myData[,c("key","c1","c2","c3")]

print(myData)
   key c1 c2  c3
1    1  3 12 102
2    2  6 15 105
3    3  9 18 108
4    4 12 21 111
5    5 15 24 114
6    6 18 27 117
7    7 21 30 120
8    8 24 33 123
9    9 27 36 126
10  10 30 39 129

或者您可以为总和列定义函数

abc <- function(x)apply(x,1,sum)

c1 <- abc(myData[ ,c("c1.min","c1.max","c1.sd")])
c2 <- abc(myData[ ,c("c2.min","c2.max","c2.sd")])
c3 <- abc(myData[ ,c("c3.min","c3.max","c3.sd")])

mydata1 <- as.data.frame(cbind(Key=myData$key,c1,c2,c3)) 

> mydata1
   Key c1 c2  c3
1    1  3 12 102
2    2  6 15 105
3    3  9 18 108
4    4 12 21 111
5    5 15 24 114
6    6 18 27 117
7    7 21 30 120
8    8 24 33 123
9    9 27 36 126
10  10 30 39 129