用于基于ID对列执行算术运算的函数

时间:2014-08-12 11:21:14

标签: r

我想将多个文件(file1_10,file1_30,file1_50 ......)中的列value1和value2与计数总和相乘。例如,对于file1_10,计数总和将用于sample1_DO1_10 + sample1_DO2_10和sample2_FX1_10 + sample2_FX2_10。

>file1_10
value1 value2 value3
0        0     0
20       0     1 
123      70    30
100      50    22 

>file2_30
value1 value2 value3
20       20    30
100      11    12 
13       0     20
100      0     0

>count 
names       counts
sample1_DO1_10  50
sample1_DO1_30  200
sample1_DO2_10  30
sample1_DO2_30  221
sample2_FX1_10  33
sample2_FX1_30  101
sample2_FX2_10  76
sample2_FX2_30  204
sample3_XX1_10  50
sample3_XX1_30  100
sample3_XX2_10  80
sample3_XX2_30  200

代码

#define 2 new columns based on 3 samples in file1_10 and file1_30
file1_10["new_value1"] <- NA
file1_10["new_value2"] <- NA
file1_10["new_value3"] <- NA
file2_30["new_value1"] <- NA
file2_30["new_value2"] <- NA
file2_30["new_value3"] <- NA
#multiply value1 value2 by sum of counts
file1_10$new_value1 <- file1_10$value_1 * (sum(count[1,2],count[3,2]))
file1_10$new_value2 <- file1_10$value_2 * (sum(count[5,2],count[7,2]))
file1_10$new_value3 <- file1_10$value_3 * (sum(count[9,2],count[11,2])
file2_30$new_value1 <- file1_30$value_1 * (sum(count[2,2],count[4,2]))
file2_30$new_value2 <- file1_30$value_2 * (sum(count[6,2],count[8,2])) 
file2_30$new_value3 <- file1_30$value_3 * (sum(count[10,2],count[12,2])) 

我想将上面的代码实现为R函数,因为我有很多样本文件,每个样本文件都有2列以上(value1 value2 value3 ....)。

由于

1 个答案:

答案 0 :(得分:0)

你可以尝试:

ls1 <- ls(pattern="file")
 ls1
#[1] "file1_10" "file2_30"
 pat <- gsub(".*\\_", "",ls1)
 library(stringr)
 res <- lapply(seq_along(pat), function(i) {
 indx <- grep(pat[i], count$names)
 x1 <- get(ls1[i])
 indx1 <- as.numeric(str_extract(count$names, "\\d+"))
 lst2 <- split(indx, indx1[indx])
  cbind(x1, setNames(do.call(cbind, lapply(seq_along(lst2), function(i) x1[i] * 
    sum(count[lst2[[i]], 2]))), paste0("new_", colnames(x1))))
   })
 res
 #[[1]]
 #   value1 value2 value3 new_value1 new_value2 new_value3
 #1      0      0      0          0          0          0   
 #2     20      0      1       1600          0        130
 #3    123     70     30       9840       7630       3900
 #4    100     50     22       8000       5450       2860

#[[2]]
#   value1 value2 value3 new_value1 new_value2 new_value3
#1     20     20     30       8420       6100       9000
#2    100     11     12      42100       3355       3600
#3     13      0     20       5473          0       6000
#4    100      0      0      42100          0          0