dplyr - 根据来自两个不同数据帧的条件进行减法

时间:2017-12-04 14:11:18

标签: r dplyr

我有一个如下所示的数据框:

quant <- structure(list(Name = structure(c(158L, 159L, 160L, 161L, 162L, 
163L, 164L, 165L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 98L, 
99L, 100L, 101L), .Label = c("abc_02_NEHC_025_100_A", "abc_02_NEHC_025_100_B", 
"abc_02_NEHC_025_100_C", "abc_02_NEHC_025_100_D", "abc_02_NEHC_025_100_E", 
"abc_02_NEHC_025_100_F", "abc_02_NEHC_025_100_G", "abc_02_NEHC_025_100_H", 
"abc_02_NEHC_05_100_A", "abc_02_NEHC_05_100_B", "abc_02_NEHC_05_100_C", 
"abc_02_NEHC_05_100_D", "abc_02_NEHC_05_100_E", "abc_02_NEHC_05_100_F", 
"abc_02_NEHC_05_100_G", "abc_02_NEHC_05_100_H", "abc_02_NEHC_100_1_A", 
"abc_02_NEHC_100_1_B", "abc_02_NEHC_100_1_C", "abc_02_NEHC_100_1_D", 
"abc_02_NEHC_100_1_E", "abc_02_NEHC_100_1_F", "abc_02_NEHC_100_1_G", 
"abc_02_NEHC_100_1_H", "abc_02_VL_025_100_A", "abc_02_VL_025_100_B", 
"abc_02_VL_025_100_C", "abc_02_VL_025_100_D", "abc_02_VL_025_100_E", 
"abc_02_VL_025_100_F", "abc_02_VL_025_100_G", "abc_02_VL_025_100_H", 
"abc_02_VL_05_100_A", "abc_02_VL_05_100_B", "abc_02_VL_05_100_C", 
"abc_02_VL_05_100_D", "abc_02_VL_05_100_E", "abc_02_VL_05_100_F", 
"abc_02_VL_05_100_G", "abc_02_VL_05_100_H", "abc_02_VL_1_100_A", 
"abc_02_VL_1_100_B", "abc_02_VL_1_100_C", "abc_02_VL_1_100_D", 
"abc_02_VL_1_100_E", "abc_02_VL_1_100_F", "abc_02_VL_1_100_G", 
"abc_02_VL_1_100_H", "BACKGROUND_NEHC_0125_100_A", "BACKGROUND_NEHC_0125_100_B", 
"BACKGROUND_NEHC_0125_100_C", "BACKGROUND_NEHC_0125_100_D", "BACKGROUND_NEHC_0125_100_E", 
"BACKGROUND_NEHC_0125_100_F", "BACKGROUND_NEHC_0125_100_G", "BACKGROUND_NEHC_025_100_A", 
"BACKGROUND_NEHC_025_100_B", "BACKGROUND_NEHC_025_100_C", "BACKGROUND_NEHC_025_100_D", 
"BACKGROUND_NEHC_025_100_F", "BACKGROUND_NEHC_025_100_G", "BACKGROUND_NEHC_05_100_A", 
"BACKGROUND_NEHC_05_100_B", "BACKGROUND_NEHC_05_100_C", "BACKGROUND_NEHC_05_100_D", 
"BACKGROUND_NEHC_05_100_F", "BACKGROUND_NEHC_05_100_G", "BACKGROUND_NEHC_05_100_H", 
"BACKGROUND_NEHC_1_100_A", "BACKGROUND_NEHC_1_100_B", "BACKGROUND_NEHC_1_100_C", 
"BACKGROUND_NEHC_1_100_D", "BACKGROUND_NEHC_1_100_E", "BACKGROUND_NEHC_1_100_F", 
"BACKGROUND_NEHC_1_100_G", "BACKGROUND_VL_0125_100_A", "BACKGROUND_VL_0125_100_B", 
"BACKGROUND_VL_0125_100_C", "BACKGROUND_VL_0125_100_D", "BACKGROUND_VL_0125_100_E", 
"BACKGROUND_VL_0125_100_F", "BACKGROUND_VL_025_100_A", "BACKGROUND_VL_025_100_B", 
"BACKGROUND_VL_025_100_C", "BACKGROUND_VL_025_100_D", "BACKGROUND_VL_025_100_E", 
"BACKGROUND_VL_025_100_F", "BACKGROUND_VL_025_100_G", "BACKGROUND_VL_025_100_H", 
"BACKGROUND_VL_05_100_A", "BACKGROUND_VL_05_100_B", "BACKGROUND_VL_05_100_C", 
"BACKGROUND_VL_05_100_D", "BACKGROUND_VL_05_100_E", "BACKGROUND_VL_05_100_F", 
"BACKGROUND_VL_05_100_G", "BACKGROUND_VL_05_100_H", "BACKGROUND_VL_1_100_A", 
"BACKGROUND_VL_1_100_B", "BACKGROUND_VL_1_100_C", "BACKGROUND_VL_1_100_D", 
"BACKGROUND_VL_1_100_E", "BACKGROUND_VL_1_100_F", "BACKGROUND_VL_1_100_G", 
"BACKGROUND_VL_1_100_H", "Epq_11_NEHC_0125_100_a", "Epq_11_NEHC_0125_100_B", 
"Epq_11_NEHC_0125_100_C", "Epq_11_NEHC_0125_100_D", "Epq_11_NEHC_0125_100_E", 
"Epq_11_NEHC_0125_100_F", "Epq_11_NEHC_0125_100_G", "Epq_11_NEHC_025_100_a", 
"Epq_11_NEHC_025_100_B", "Epq_11_NEHC_025_100_C", "Epq_11_NEHC_025_100_D", 
"Epq_11_NEHC_025_100_E", "Epq_11_NEHC_05_100_a", "Epq_11_NEHC_05_100_B", 
"Epq_11_NEHC_05_100_C", "Epq_11_NEHC_05_100_D", "Epq_11_NEHC_05_100_E", 
"Epq_11_NEHC_05_100_F", "Epq_11_NEHC_05_100_G", "Epq_11_NEHC_05_100_H", 
"Epq_11_NEHC_1_100_a", "Epq_11_NEHC_1_100_B", "Epq_11_NEHC_1_100_C", 
"Epq_11_NEHC_1_100_D", "Epq_11_NEHC_1_100_E", "Epq_11_NEHC_1_100_F", 
"Epq_11_NEHC_1_100_G", "Epq_11_NEHC_1_100_H", "Epq_11_VL_0125_100_A", 
"Epq_11_VL_0125_100_B", "Epq_11_VL_0125_100_C", "Epq_11_VL_0125_100_D", 
"Epq_11_VL_0125_100_E", "Epq_11_VL_0125_100_F", "Epq_11_VL_0125_100_G", 
"Epq_11_VL_0125_100_H", "Epq_11_VL_025_100_A", "Epq_11_VL_025_100_B", 
"Epq_11_VL_025_100_C", "Epq_11_VL_025_100_D", "Epq_11_VL_025_100_E", 
"Epq_11_VL_025_100_F", "Epq_11_VL_025_100_G", "Epq_11_VL_025_100_H", 
"Epq_11_VL_05_100_A", "Epq_11_VL_05_100_B", "Epq_11_VL_05_100_C", 
"Epq_11_VL_05_100_D", "Epq_11_VL_05_100_E", "Epq_11_VL_05_100_F", 
"Epq_11_VL_05_100_G", "Epq_11_VL_05_100_H", "Epq_11_VL_1_100_A", 
"Epq_11_VL_1_100_B", "Epq_11_VL_1_100_C", "Epq_11_VL_1_100_D", 
"Epq_11_VL_1_100_E", "Epq_11_VL_1_100_F", "Epq_11_VL_1_100_G", 
"Epq_11_VL_1_100_H"), class = "factor"), conc_factor = structure(c(4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L), .Label = c("pep_0.125", "pep_0.25", "pep_0.5", "pep_1.0"
), class = "factor"), peptide_factor = structure(c(3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L), .Label = c("ABC", "Background", "EpQ_11"), class = "factor"), 
    serum_factor = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("NEHC", 
    "VL"), class = "factor"), mean_fluorescence = c(65535, 65535, 
    65534.93359, 65535, 65535, 65535, 65535, 65535, 21322.06055, 
    22704.08594, 22546.32617, 21801.30664, 21668.2168, 22054.40234, 
    21621.54688, 21516.33984, 17760.80273, 17886.12891, 18382.7832, 
    17531.80273)), class = "data.frame", row.names = c(NA, -20L
), .Names = c("Name", "conc_factor", "peptide_factor", "serum_factor", 
"mean_fluorescence"))

这实际上只是我完整数据框的一个切片(1:20)。为了更好地了解我的完整数据框,我粘贴在变量conc_factorpeptide_factorserum_factor的水平以下:

levels(quant$conc_factor)
[1] "pep_0.125" "pep_0.25"  "pep_0.5"   "pep_1.0"  
levels(quant$peptide_factor)
[1] "ABC"        "Background" "EpQ_11"    
levels(quant$serum_factor)
[1] "NEHC" "VL"  

使用以下命令:

summary_backgrounds <- quant %>% filter(peptide_factor=="Background") %>% group_by(conc_factor, serum_factor) %>% summarise(avg_fluorescence_grouped = mean(mean_fluorescence))

  conc_factor serum_factor avg_fluorescence_grouped
       <fctr>       <fctr>                    <dbl>
1   pep_0.125         NEHC                 18439.70
2   pep_0.125           VL                 16985.60
3    pep_0.25         NEHC                 18666.52
4    pep_0.25           VL                 17577.98
5     pep_0.5         NEHC                 18300.47
6     pep_0.5           VL                 18010.99
7     pep_1.0         NEHC                 16103.50
8     pep_1.0           VL                 17710.50

我为每个mean_fluorescenceconc_factor获取了背景的serum_factor值。我现在要做的是以下内容:我想在数据框quant(名为avg_fluorescence_minus_background)中添加一个新变量,我将在其中减去背景值(summary_backgrounds$avg_fluorescence_grouped,考虑conc_factor上每个值的serum_factorquant$mean_fluorescence

例如,对于quant[1, ],假设我有conc_factor=="pep_1.0"serum_factor=="VL",我的结果将是65535.00 - 17710.50 = 47824.5。等等。

2 个答案:

答案 0 :(得分:2)

阅读联接,你会发现他们很容易解决这类问题:

quant <- left_join(quant, summary_backgrounds, by = c("conc_factor", "serum_factor"))
mutate(quant, avg_flourescence_minus_bg = mean_fluorescence - avg_fluorescence_grouped)

答案 1 :(得分:0)

您可以使用以下内容。它会在NA的行上为列avg_fluorescence_minus_background添加peptide_factor != 'Background',并为您提供所需的结果;

quant %>% group_by(conc_factor, serum_factor, peptide_factor) %>% 
    mutate(avg_fluorescence_grouped = mean(mean_fluorescence)) %>% 
    mutate(avg_fluorescence_minus_background = ifelse(peptide_factor == 
        "Background", avg_fluorescence_grouped - mean_fluorescence, 
        NA)) %>% select(-avg_fluorescence_grouped)

## # A tibble: 20 x 6
## # Groups:   conc_factor, serum_factor, peptide_factor [3]
##    conc_factor peptide_factor serum_factor mean_fluorescence avg_fluorescence_minus_background
##         <fctr>         <fctr>       <fctr>             <dbl>                          <dbl>
##  1     pep_1.0         EpQ_11           VL          65535.00                             NA
##  2     pep_1.0         EpQ_11           VL          65535.00                             NA
##  3     pep_1.0         EpQ_11           VL          65534.93                             NA
##  4     pep_1.0         EpQ_11           VL          65535.00                             NA
##  5     pep_1.0         EpQ_11           VL          65535.00                             NA
##  6     pep_1.0         EpQ_11           VL          65535.00                             NA
##  7     pep_1.0         EpQ_11           VL          65535.00                             NA
##  8     pep_1.0         EpQ_11           VL          65535.00                             NA
##  9     pep_1.0            ABC           VL          21322.06                             NA
## 10     pep_1.0            ABC           VL          22704.09                             NA
## 11     pep_1.0            ABC           VL          22546.33                             NA
## 12     pep_1.0            ABC           VL          21801.31                             NA
## 13     pep_1.0            ABC           VL          21668.22                             NA
## 14     pep_1.0            ABC           VL          22054.40                             NA
## 15     pep_1.0            ABC           VL          21621.55                             NA
## 16     pep_1.0            ABC           VL          21516.34                             NA
## 17     pep_1.0     Background           VL          17760.80                     129.576662
## 18     pep_1.0     Background           VL          17886.13                       4.250482
## 19     pep_1.0     Background           VL          18382.78                    -492.403808
## 20     pep_1.0     Background           VL          17531.80                     358.576662
# ... with 1 more variables: Name <fctr>