说我有下表:
数据
df1 <- structure(list(sample = c("WT_mock", "WT_mock", "WT_mock", "WT_stim",
"WT_stim", "WT_mock", "WT_mock", "WT_mock", "WT_stim", "WT_stim"
), target = c("ref", "goi1", "goi2", "goi1", "goi2", "ref", "goi1",
"goi2", "goi1", "goi2"), replicate = c(1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L), mean = c(15L, 20L, 21L, 22L, 23L, 15L, 20L,
21L, 22L, 23L)), class = "data.frame", row.names = c("1", "2",
"3", "4", "5", "6", "7", "8", "9", "10"))
# A tibble: 10 x 4
sample target replicate mean
<chr> <chr> <dbl> <dbl>
1 WT_mock ref 1 15
2 WT_mock goi1 1 20
3 WT_mock goi2 1 21
4 WT_stim goi1 1 22
5 WT_stim goi2 1 23
6 WT_mock ref 2 15
7 WT_mock goi1 2 20
8 WT_mock goi2 2 21
9 WT_stim goi1 2 22
10 WT_stim goi2 2 23
我正在尝试从我的参考文献(WT_mock,ref)中减去每个goi的平均值,以创建此副本:
# A tibble: 10 x 5
sample target replicate mean dCt
<chr> <chr> <dbl> <dbl> <dbl>
1 WT_mock ref 1 15 0
2 WT_mock goi1 1 20 -5
3 WT_mock goi2 1 21 -6
4 WT_stim goi1 1 22 -7
5 WT_stim goi2 1 23 -8
6 WT_mock ref 2 15 0
7 WT_mock goi1 2 20 -5
8 WT_mock goi2 2 21 -6
9 WT_stim goi1 2 22 -7
10 WT_stim goi2 2 23 -8
我知道有几种生物导体封装可以做到这一点,但是我想以此作为练习使用R进行数据操作。
我的方法是每次复制filter
并“手动”减去ref值,但是我想有一种更优雅的方法来解决此问题(dplyr
等)
我的真实数据集有多个重复样本,我想知道是否有更快的方法来实现这一目标。
非常感谢!
答案 0 :(得分:2)
按“复制”分组后,通过从与“目标”中“参考”值相对应的平均值中减去“平均值”来创建“ dCt”
library(dplyr)
df1 %>%
group_by(replicate) %>%
mutate(dCt = mean[target == 'ref'] - mean)
# A tibble: 10 x 5
# Groups: replicate [2]
# sample target replicate mean dCt
# <chr> <chr> <int> <int> <int>
# 1 WT_mock ref 1 15 0
# 2 WT_mock goi1 1 20 -5
# 3 WT_mock goi2 1 21 -6
# 4 WT_stim goi1 1 22 -7
# 5 WT_stim goi2 1 23 -8
# 6 WT_mock ref 2 15 0
# 7 WT_mock goi1 2 20 -5
# 8 WT_mock goi2 2 21 -6
# 9 WT_stim goi1 2 22 -7
#10 WT_stim goi2 2 23 -8
或使用match
df1 %>%
group_by(replicate) %>%
mutate(dCt = mean[match('ref', target)] - mean)
或使用data.table
library(data.table)
setDT(df1)[, dCt := mean[match('ref', target)] - mean, by = replicate]
在base R
中,我们也可以这样做
df1$dCt <- with(df1, mean[target == 'ref'][replicate] - mean)
df1 <- structure(list(sample = c("WT_mock", "WT_mock", "WT_mock", "WT_stim",
"WT_stim", "WT_mock", "WT_mock", "WT_mock", "WT_stim", "WT_stim"
), target = c("ref", "goi1", "goi2", "goi1", "goi2", "ref", "goi1",
"goi2", "goi1", "goi2"), replicate = c(1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L), mean = c(15L, 20L, 21L, 22L, 23L, 15L, 20L,
21L, 22L, 23L)), class = "data.frame", row.names = c("1", "2",
"3", "4", "5", "6", "7", "8", "9", "10"))
答案 1 :(得分:2)
以下是一些基本的R解决方案:
ref
作为各个组的第一行,也许您可以尝试以下代码df <- within(df,dCt<-ave(mean,replicate,FUN = function(x) head(x,1)-x))
ref
不是相应组中的第一行)df <- do.call(rbind,c(make.row.names = FALSE,
lapply(split(df,df$replicate),
function(v) within(v,dCt<-mean[target=="ref"]-mean))))
以上两种都会给你
> df
sample target replicate mean dCt
1 WT_mock ref 1 15 0
2 WT_mock goi1 1 20 -5
3 WT_mock goi2 1 21 -6
4 WT_stim goi1 1 22 -7
5 WT_stim goi2 1 23 -8
6 WT_mock ref 2 15 0
7 WT_mock goi1 2 20 -5
8 WT_mock goi2 2 21 -6
9 WT_stim goi1 2 22 -7
10 WT_stim goi2 2 23 -8