我想将多个文件(file1_10,file1_30,file1_50 ......)中的列value1和value2与计数总和相乘。例如,对于file1_10,计数总和将用于sample1_DO1_10 + sample1_DO2_10和sample2_FX1_10 + sample2_FX2_10。
>file1_10
value1 value2 value3
0 0 0
20 0 1
123 70 30
100 50 22
>file2_30
value1 value2 value3
20 20 30
100 11 12
13 0 20
100 0 0
>count
names counts
sample1_DO1_10 50
sample1_DO1_30 200
sample1_DO2_10 30
sample1_DO2_30 221
sample2_FX1_10 33
sample2_FX1_30 101
sample2_FX2_10 76
sample2_FX2_30 204
sample3_XX1_10 50
sample3_XX1_30 100
sample3_XX2_10 80
sample3_XX2_30 200
代码
#define 2 new columns based on 3 samples in file1_10 and file1_30
file1_10["new_value1"] <- NA
file1_10["new_value2"] <- NA
file1_10["new_value3"] <- NA
file2_30["new_value1"] <- NA
file2_30["new_value2"] <- NA
file2_30["new_value3"] <- NA
#multiply value1 value2 by sum of counts
file1_10$new_value1 <- file1_10$value_1 * (sum(count[1,2],count[3,2]))
file1_10$new_value2 <- file1_10$value_2 * (sum(count[5,2],count[7,2]))
file1_10$new_value3 <- file1_10$value_3 * (sum(count[9,2],count[11,2])
file2_30$new_value1 <- file1_30$value_1 * (sum(count[2,2],count[4,2]))
file2_30$new_value2 <- file1_30$value_2 * (sum(count[6,2],count[8,2]))
file2_30$new_value3 <- file1_30$value_3 * (sum(count[10,2],count[12,2]))
我想将上面的代码实现为R函数,因为我有很多样本文件,每个样本文件都有2列以上(value1 value2 value3 ....)。
由于
答案 0 :(得分:0)
你可以尝试:
ls1 <- ls(pattern="file")
ls1
#[1] "file1_10" "file2_30"
pat <- gsub(".*\\_", "",ls1)
library(stringr)
res <- lapply(seq_along(pat), function(i) {
indx <- grep(pat[i], count$names)
x1 <- get(ls1[i])
indx1 <- as.numeric(str_extract(count$names, "\\d+"))
lst2 <- split(indx, indx1[indx])
cbind(x1, setNames(do.call(cbind, lapply(seq_along(lst2), function(i) x1[i] *
sum(count[lst2[[i]], 2]))), paste0("new_", colnames(x1))))
})
res
#[[1]]
# value1 value2 value3 new_value1 new_value2 new_value3
#1 0 0 0 0 0 0
#2 20 0 1 1600 0 130
#3 123 70 30 9840 7630 3900
#4 100 50 22 8000 5450 2860
#[[2]]
# value1 value2 value3 new_value1 new_value2 new_value3
#1 20 20 30 8420 6100 9000
#2 100 11 12 42100 3355 3600
#3 13 0 20 5473 0 6000
#4 100 0 0 42100 0 0