我有一个如下所示的数据框:
quant <- structure(list(Name = structure(c(158L, 159L, 160L, 161L, 162L,
163L, 164L, 165L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 98L,
99L, 100L, 101L), .Label = c("abc_02_NEHC_025_100_A", "abc_02_NEHC_025_100_B",
"abc_02_NEHC_025_100_C", "abc_02_NEHC_025_100_D", "abc_02_NEHC_025_100_E",
"abc_02_NEHC_025_100_F", "abc_02_NEHC_025_100_G", "abc_02_NEHC_025_100_H",
"abc_02_NEHC_05_100_A", "abc_02_NEHC_05_100_B", "abc_02_NEHC_05_100_C",
"abc_02_NEHC_05_100_D", "abc_02_NEHC_05_100_E", "abc_02_NEHC_05_100_F",
"abc_02_NEHC_05_100_G", "abc_02_NEHC_05_100_H", "abc_02_NEHC_100_1_A",
"abc_02_NEHC_100_1_B", "abc_02_NEHC_100_1_C", "abc_02_NEHC_100_1_D",
"abc_02_NEHC_100_1_E", "abc_02_NEHC_100_1_F", "abc_02_NEHC_100_1_G",
"abc_02_NEHC_100_1_H", "abc_02_VL_025_100_A", "abc_02_VL_025_100_B",
"abc_02_VL_025_100_C", "abc_02_VL_025_100_D", "abc_02_VL_025_100_E",
"abc_02_VL_025_100_F", "abc_02_VL_025_100_G", "abc_02_VL_025_100_H",
"abc_02_VL_05_100_A", "abc_02_VL_05_100_B", "abc_02_VL_05_100_C",
"abc_02_VL_05_100_D", "abc_02_VL_05_100_E", "abc_02_VL_05_100_F",
"abc_02_VL_05_100_G", "abc_02_VL_05_100_H", "abc_02_VL_1_100_A",
"abc_02_VL_1_100_B", "abc_02_VL_1_100_C", "abc_02_VL_1_100_D",
"abc_02_VL_1_100_E", "abc_02_VL_1_100_F", "abc_02_VL_1_100_G",
"abc_02_VL_1_100_H", "BACKGROUND_NEHC_0125_100_A", "BACKGROUND_NEHC_0125_100_B",
"BACKGROUND_NEHC_0125_100_C", "BACKGROUND_NEHC_0125_100_D", "BACKGROUND_NEHC_0125_100_E",
"BACKGROUND_NEHC_0125_100_F", "BACKGROUND_NEHC_0125_100_G", "BACKGROUND_NEHC_025_100_A",
"BACKGROUND_NEHC_025_100_B", "BACKGROUND_NEHC_025_100_C", "BACKGROUND_NEHC_025_100_D",
"BACKGROUND_NEHC_025_100_F", "BACKGROUND_NEHC_025_100_G", "BACKGROUND_NEHC_05_100_A",
"BACKGROUND_NEHC_05_100_B", "BACKGROUND_NEHC_05_100_C", "BACKGROUND_NEHC_05_100_D",
"BACKGROUND_NEHC_05_100_F", "BACKGROUND_NEHC_05_100_G", "BACKGROUND_NEHC_05_100_H",
"BACKGROUND_NEHC_1_100_A", "BACKGROUND_NEHC_1_100_B", "BACKGROUND_NEHC_1_100_C",
"BACKGROUND_NEHC_1_100_D", "BACKGROUND_NEHC_1_100_E", "BACKGROUND_NEHC_1_100_F",
"BACKGROUND_NEHC_1_100_G", "BACKGROUND_VL_0125_100_A", "BACKGROUND_VL_0125_100_B",
"BACKGROUND_VL_0125_100_C", "BACKGROUND_VL_0125_100_D", "BACKGROUND_VL_0125_100_E",
"BACKGROUND_VL_0125_100_F", "BACKGROUND_VL_025_100_A", "BACKGROUND_VL_025_100_B",
"BACKGROUND_VL_025_100_C", "BACKGROUND_VL_025_100_D", "BACKGROUND_VL_025_100_E",
"BACKGROUND_VL_025_100_F", "BACKGROUND_VL_025_100_G", "BACKGROUND_VL_025_100_H",
"BACKGROUND_VL_05_100_A", "BACKGROUND_VL_05_100_B", "BACKGROUND_VL_05_100_C",
"BACKGROUND_VL_05_100_D", "BACKGROUND_VL_05_100_E", "BACKGROUND_VL_05_100_F",
"BACKGROUND_VL_05_100_G", "BACKGROUND_VL_05_100_H", "BACKGROUND_VL_1_100_A",
"BACKGROUND_VL_1_100_B", "BACKGROUND_VL_1_100_C", "BACKGROUND_VL_1_100_D",
"BACKGROUND_VL_1_100_E", "BACKGROUND_VL_1_100_F", "BACKGROUND_VL_1_100_G",
"BACKGROUND_VL_1_100_H", "Epq_11_NEHC_0125_100_a", "Epq_11_NEHC_0125_100_B",
"Epq_11_NEHC_0125_100_C", "Epq_11_NEHC_0125_100_D", "Epq_11_NEHC_0125_100_E",
"Epq_11_NEHC_0125_100_F", "Epq_11_NEHC_0125_100_G", "Epq_11_NEHC_025_100_a",
"Epq_11_NEHC_025_100_B", "Epq_11_NEHC_025_100_C", "Epq_11_NEHC_025_100_D",
"Epq_11_NEHC_025_100_E", "Epq_11_NEHC_05_100_a", "Epq_11_NEHC_05_100_B",
"Epq_11_NEHC_05_100_C", "Epq_11_NEHC_05_100_D", "Epq_11_NEHC_05_100_E",
"Epq_11_NEHC_05_100_F", "Epq_11_NEHC_05_100_G", "Epq_11_NEHC_05_100_H",
"Epq_11_NEHC_1_100_a", "Epq_11_NEHC_1_100_B", "Epq_11_NEHC_1_100_C",
"Epq_11_NEHC_1_100_D", "Epq_11_NEHC_1_100_E", "Epq_11_NEHC_1_100_F",
"Epq_11_NEHC_1_100_G", "Epq_11_NEHC_1_100_H", "Epq_11_VL_0125_100_A",
"Epq_11_VL_0125_100_B", "Epq_11_VL_0125_100_C", "Epq_11_VL_0125_100_D",
"Epq_11_VL_0125_100_E", "Epq_11_VL_0125_100_F", "Epq_11_VL_0125_100_G",
"Epq_11_VL_0125_100_H", "Epq_11_VL_025_100_A", "Epq_11_VL_025_100_B",
"Epq_11_VL_025_100_C", "Epq_11_VL_025_100_D", "Epq_11_VL_025_100_E",
"Epq_11_VL_025_100_F", "Epq_11_VL_025_100_G", "Epq_11_VL_025_100_H",
"Epq_11_VL_05_100_A", "Epq_11_VL_05_100_B", "Epq_11_VL_05_100_C",
"Epq_11_VL_05_100_D", "Epq_11_VL_05_100_E", "Epq_11_VL_05_100_F",
"Epq_11_VL_05_100_G", "Epq_11_VL_05_100_H", "Epq_11_VL_1_100_A",
"Epq_11_VL_1_100_B", "Epq_11_VL_1_100_C", "Epq_11_VL_1_100_D",
"Epq_11_VL_1_100_E", "Epq_11_VL_1_100_F", "Epq_11_VL_1_100_G",
"Epq_11_VL_1_100_H"), class = "factor"), conc_factor = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L), .Label = c("pep_0.125", "pep_0.25", "pep_0.5", "pep_1.0"
), class = "factor"), peptide_factor = structure(c(3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L), .Label = c("ABC", "Background", "EpQ_11"), class = "factor"),
serum_factor = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("NEHC",
"VL"), class = "factor"), mean_fluorescence = c(65535, 65535,
65534.93359, 65535, 65535, 65535, 65535, 65535, 21322.06055,
22704.08594, 22546.32617, 21801.30664, 21668.2168, 22054.40234,
21621.54688, 21516.33984, 17760.80273, 17886.12891, 18382.7832,
17531.80273)), class = "data.frame", row.names = c(NA, -20L
), .Names = c("Name", "conc_factor", "peptide_factor", "serum_factor",
"mean_fluorescence"))
这实际上只是我完整数据框的一个切片(1:20)。为了更好地了解我的完整数据框,我粘贴在变量conc_factor
,peptide_factor
和serum_factor
的水平以下:
levels(quant$conc_factor)
[1] "pep_0.125" "pep_0.25" "pep_0.5" "pep_1.0"
levels(quant$peptide_factor)
[1] "ABC" "Background" "EpQ_11"
levels(quant$serum_factor)
[1] "NEHC" "VL"
使用以下命令:
summary_backgrounds <- quant %>% filter(peptide_factor=="Background") %>% group_by(conc_factor, serum_factor) %>% summarise(avg_fluorescence_grouped = mean(mean_fluorescence))
conc_factor serum_factor avg_fluorescence_grouped
<fctr> <fctr> <dbl>
1 pep_0.125 NEHC 18439.70
2 pep_0.125 VL 16985.60
3 pep_0.25 NEHC 18666.52
4 pep_0.25 VL 17577.98
5 pep_0.5 NEHC 18300.47
6 pep_0.5 VL 18010.99
7 pep_1.0 NEHC 16103.50
8 pep_1.0 VL 17710.50
我为每个mean_fluorescence
和conc_factor
获取了背景的serum_factor
值。我现在要做的是以下内容:我想在数据框quant
(名为avg_fluorescence_minus_background
)中添加一个新变量,我将在其中减去背景值(summary_backgrounds$avg_fluorescence_grouped
,考虑conc_factor
上每个值的serum_factor
和quant$mean_fluorescence
。
例如,对于quant[1, ]
,假设我有conc_factor=="pep_1.0"
和serum_factor=="VL"
,我的结果将是65535.00 - 17710.50 = 47824.5。等等。
答案 0 :(得分:2)
阅读联接,你会发现他们很容易解决这类问题:
quant <- left_join(quant, summary_backgrounds, by = c("conc_factor", "serum_factor"))
mutate(quant, avg_flourescence_minus_bg = mean_fluorescence - avg_fluorescence_grouped)
答案 1 :(得分:0)
您可以使用以下内容。它会在NA
的行上为列avg_fluorescence_minus_background
添加peptide_factor != 'Background'
,并为您提供所需的结果;
quant %>% group_by(conc_factor, serum_factor, peptide_factor) %>%
mutate(avg_fluorescence_grouped = mean(mean_fluorescence)) %>%
mutate(avg_fluorescence_minus_background = ifelse(peptide_factor ==
"Background", avg_fluorescence_grouped - mean_fluorescence,
NA)) %>% select(-avg_fluorescence_grouped)
## # A tibble: 20 x 6
## # Groups: conc_factor, serum_factor, peptide_factor [3]
## conc_factor peptide_factor serum_factor mean_fluorescence avg_fluorescence_minus_background
## <fctr> <fctr> <fctr> <dbl> <dbl>
## 1 pep_1.0 EpQ_11 VL 65535.00 NA
## 2 pep_1.0 EpQ_11 VL 65535.00 NA
## 3 pep_1.0 EpQ_11 VL 65534.93 NA
## 4 pep_1.0 EpQ_11 VL 65535.00 NA
## 5 pep_1.0 EpQ_11 VL 65535.00 NA
## 6 pep_1.0 EpQ_11 VL 65535.00 NA
## 7 pep_1.0 EpQ_11 VL 65535.00 NA
## 8 pep_1.0 EpQ_11 VL 65535.00 NA
## 9 pep_1.0 ABC VL 21322.06 NA
## 10 pep_1.0 ABC VL 22704.09 NA
## 11 pep_1.0 ABC VL 22546.33 NA
## 12 pep_1.0 ABC VL 21801.31 NA
## 13 pep_1.0 ABC VL 21668.22 NA
## 14 pep_1.0 ABC VL 22054.40 NA
## 15 pep_1.0 ABC VL 21621.55 NA
## 16 pep_1.0 ABC VL 21516.34 NA
## 17 pep_1.0 Background VL 17760.80 129.576662
## 18 pep_1.0 Background VL 17886.13 4.250482
## 19 pep_1.0 Background VL 18382.78 -492.403808
## 20 pep_1.0 Background VL 17531.80 358.576662
# ... with 1 more variables: Name <fctr>