迭代减去两个数据帧的列值

时间:2017-11-21 14:18:33

标签: r

我有两个数据帧:

df.means.refs

enter image description here

df.target

enter image description here

我需要遍历df.target并进行以下减法:

df.target$Cq - df.means.refs$Cq

df.target是group_by Tissue,Accession,Genotype,Gene,BReplicate

df.means.refs是group_by Tissue,accession,Genotype,BReplicate

例如我需要计算

df.target$Cq - df.means.refs$Cq (row 1),
df.target$Cq - df.means.refs$Cq (row 2),
df.target$Cq - df.means.refs$Cq (row 3),
到目前为止没有问题。但现在

df.target$Cq - df.means.refs$Cq (row 4 in df.target - row1 in df.means.refs)
df.target$Cq - df.means.refs$Cq (row 5 in df.target - row2 in df.means.refs)
df.target$Cq - df.means.refs$Cq (row 6 in df.target - row3 in df.means.refs)

等等。

从df.target第10行开始,Genotype列切换到WT,现在我需要根据基因型:WT在df.means.refs中使用Cq值循环。

(基本上Cq值对应于每个基因型3个生物学重复,两个不同的基因型;测试并列在df.target中的三个不同基因)

如何在R中编码?

非常感谢你的帮助,凯

数据帧(通过dput(): df.means.refs:

structure(list(Tissue = c("Seedling", "Seedling", "Seedling", 
"Seedling", "Seedling", "Seedling"), Accession = c("Col", "Col", 
"Col", "Col", "Col", "Col"), Genotype = c("sub-9", "sub-9", "sub-9", 
"WT", "WT", "WT"), BReplicate = c("1", "2", "3", "1", "2", "3"
), Cq = c(25.2540053029395, 24.6386988176262, 24.5407237397682, 
24.7295032752289, 24.4544553518053, 24.4604738608338)), .Names = c("Tissue", 
"Accession", "Genotype", "BReplicate", "Cq"), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L), vars = c("Tissue", 
"Accession", "Genotype"), drop = TRUE)

df.target:

structure(list(Tissue = c("Seedling", "Seedling", "Seedling", 
"Seedling", "Seedling", "Seedling", "Seedling", "Seedling", "Seedling", 
"Seedling", "Seedling", "Seedling", "Seedling", "Seedling", "Seedling", 
"Seedling", "Seedling", "Seedling"), Accession = c("Col", "Col", 
"Col", "Col", "Col", "Col", "Col", "Col", "Col", "Col", "Col", 
"Col", "Col", "Col", "Col", "Col", "Col", "Col"), Genotype = c("sub-9", 
"sub-9", "sub-9", "sub-9", "sub-9", "sub-9", "sub-9", "sub-9", 
"sub-9", "WT", "WT", "WT", "WT", "WT", "WT", "WT", "WT", "WT"
), Gene = c("CESA1", "CESA1", "CESA1", "CESA3", "CESA3", "CESA3", 
"PRC1", "PRC1", "PRC1", "CESA1", "CESA1", "CESA1", "CESA3", "CESA3", 
"CESA3", "PRC1", "PRC1", "PRC1"), BReplicate = c("1", "2", "3", 
"1", "2", "3", "1", "2", "3", "1", "2", "3", "1", "2", "3", "1", 
"2", "3"), Cq = c(23.496195267366, 22.7054342062343, 22.639685306532, 
22.8211127310626, 22.0820545565921, 22.1110378642623, 24.1462601520338, 
23.3972522049923, 23.3270831096319, 22.5998036632355, 22.7161277680243, 
22.6526346162252, 22.237990186265, 22.0715318793714, 22.0171712171306, 
23.4524362896598, 23.4121887867123, 23.3648625264175)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -18L), .Names = c("Tissue", 
"Accession", "Genotype", "Gene", "BReplicate", "Cq"), vars = c("Tissue", 
"Accession", "Genotype", "Gene", "BReplicate"), drop = TRUE, indices = list(
    0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 
    14L, 15L, 16L, 17L), group_sizes = c(1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
    Tissue = c("Seedling", "Seedling", "Seedling", "Seedling", 
    "Seedling", "Seedling", "Seedling", "Seedling", "Seedling", 
    "Seedling", "Seedling", "Seedling", "Seedling", "Seedling", 
    "Seedling", "Seedling", "Seedling", "Seedling"), Accession = c("Col", 
    "Col", "Col", "Col", "Col", "Col", "Col", "Col", "Col", "Col", 
    "Col", "Col", "Col", "Col", "Col", "Col", "Col", "Col"), 
    Genotype = c("sub-9", "sub-9", "sub-9", "sub-9", "sub-9", 
    "sub-9", "sub-9", "sub-9", "sub-9", "WT", "WT", "WT", "WT", 
    "WT", "WT", "WT", "WT", "WT"), Gene = c("CESA1", "CESA1", 
    "CESA1", "CESA3", "CESA3", "CESA3", "PRC1", "PRC1", "PRC1", 
    "CESA1", "CESA1", "CESA1", "CESA3", "CESA3", "CESA3", "PRC1", 
    "PRC1", "PRC1"), BReplicate = c("1", "2", "3", "1", "2", 
    "3", "1", "2", "3", "1", "2", "3", "1", "2", "3", "1", "2", 
    "3")), class = "data.frame", row.names = c(NA, -18L), vars = c("Tissue", 
"Accession", "Genotype", "Gene", "BReplicate"), drop = TRUE, .Names = c("Tissue", 
"Accession", "Genotype", "Gene", "BReplicate")))

1 个答案:

答案 0 :(得分:0)

我想您正在寻找TissueAccessionGenotypeBReplicate上的两个数据集的加入

使用dplyr

library(dplyr)
df.target %>% 
  left_join(df.means.refs, by = c("Tissue", "Accession", "Genotype", "BReplicate")) %>% 
  mutate(diff = Cq.x - Cq.y)
# A tibble: 18 x 8
# Groups:   Tissue, Accession, Genotype, Gene, BReplicate [18]
     Tissue Accession Genotype  Gene BReplicate     Cq.x     Cq.y      diff
      <chr>     <chr>    <chr> <chr>      <chr>    <dbl>    <dbl>     <dbl>
 1 Seedling       Col    sub-9 CESA1          1 23.49620 25.25401 -1.757810
 2 Seedling       Col    sub-9 CESA1          2 22.70543 24.63870 -1.933265
 3 Seedling       Col    sub-9 CESA1          3 22.63969 24.54072 -1.901038
 4 Seedling       Col    sub-9 CESA3          1 22.82111 25.25401 -2.432893
 5 Seedling       Col    sub-9 CESA3          2 22.08205 24.63870 -2.556644
 6 Seedling       Col    sub-9 CESA3          3 22.11104 24.54072 -2.429686
 7 Seedling       Col    sub-9  PRC1          1 24.14626 25.25401 -1.107745
 8 Seedling       Col    sub-9  PRC1          2 23.39725 24.63870 -1.241447
 9 Seedling       Col    sub-9  PRC1          3 23.32708 24.54072 -1.213641
10 Seedling       Col       WT CESA1          1 22.59980 24.72950 -2.129700
11 Seedling       Col       WT CESA1          2 22.71613 24.45446 -1.738328
12 Seedling       Col       WT CESA1          3 22.65263 24.46047 -1.807839
13 Seedling       Col       WT CESA3          1 22.23799 24.72950 -2.491513
14 Seedling       Col       WT CESA3          2 22.07153 24.45446 -2.382923
15 Seedling       Col       WT CESA3          3 22.01717 24.46047 -2.443303
16 Seedling       Col       WT  PRC1          1 23.45244 24.72950 -1.277067
17 Seedling       Col       WT  PRC1          2 23.41219 24.45446 -1.042267
18 Seedling       Col       WT  PRC1          3 23.36486 24.46047 -1.095611

使用data.table

library(data.table)
options(datatable.print.class = TRUE)
setDT(df.target)[setDT(df.means.refs), on = .(Tissue, Accession, Genotype, BReplicate), 
                 diff := Cq - i.Cq][]            
      Tissue Accession Genotype   Gene BReplicate       Cq      diff
      <char>    <char>   <char> <char>     <char>    <num>     <num>
 1: Seedling       Col    sub-9  CESA1          1 23.49620 -1.757810
 2: Seedling       Col    sub-9  CESA1          2 22.70543 -1.933265
 3: Seedling       Col    sub-9  CESA1          3 22.63969 -1.901038
 4: Seedling       Col    sub-9  CESA3          1 22.82111 -2.432893
 5: Seedling       Col    sub-9  CESA3          2 22.08205 -2.556644
 6: Seedling       Col    sub-9  CESA3          3 22.11104 -2.429686
 7: Seedling       Col    sub-9   PRC1          1 24.14626 -1.107745
 8: Seedling       Col    sub-9   PRC1          2 23.39725 -1.241447
 9: Seedling       Col    sub-9   PRC1          3 23.32708 -1.213641
10: Seedling       Col       WT  CESA1          1 22.59980 -2.129700
11: Seedling       Col       WT  CESA1          2 22.71613 -1.738328
12: Seedling       Col       WT  CESA1          3 22.65263 -1.807839
13: Seedling       Col       WT  CESA3          1 22.23799 -2.491513
14: Seedling       Col       WT  CESA3          2 22.07153 -2.382923
15: Seedling       Col       WT  CESA3          3 22.01717 -2.443303
16: Seedling       Col       WT   PRC1          1 23.45244 -1.277067
17: Seedling       Col       WT   PRC1          2 23.41219 -1.042267
18: Seedling       Col       WT   PRC1          3 23.36486 -1.095611