我有两个数据帧:
df.means.refs
:
df.target
:
我需要遍历df.target并进行以下减法:
df.target$Cq - df.means.refs$Cq
df.target是group_by Tissue,Accession,Genotype,Gene,BReplicate
df.means.refs是group_by Tissue,accession,Genotype,BReplicate
例如我需要计算
df.target$Cq - df.means.refs$Cq (row 1),
df.target$Cq - df.means.refs$Cq (row 2),
df.target$Cq - df.means.refs$Cq (row 3),
到目前为止没有问题。但现在
df.target$Cq - df.means.refs$Cq (row 4 in df.target - row1 in df.means.refs)
df.target$Cq - df.means.refs$Cq (row 5 in df.target - row2 in df.means.refs)
df.target$Cq - df.means.refs$Cq (row 6 in df.target - row3 in df.means.refs)
等等。
从df.target第10行开始,Genotype列切换到WT,现在我需要根据基因型:WT在df.means.refs中使用Cq值循环。
(基本上Cq值对应于每个基因型3个生物学重复,两个不同的基因型;测试并列在df.target中的三个不同基因)
如何在R中编码?
非常感谢你的帮助,凯
数据帧(通过dput(): df.means.refs:
structure(list(Tissue = c("Seedling", "Seedling", "Seedling",
"Seedling", "Seedling", "Seedling"), Accession = c("Col", "Col",
"Col", "Col", "Col", "Col"), Genotype = c("sub-9", "sub-9", "sub-9",
"WT", "WT", "WT"), BReplicate = c("1", "2", "3", "1", "2", "3"
), Cq = c(25.2540053029395, 24.6386988176262, 24.5407237397682,
24.7295032752289, 24.4544553518053, 24.4604738608338)), .Names = c("Tissue",
"Accession", "Genotype", "BReplicate", "Cq"), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L), vars = c("Tissue",
"Accession", "Genotype"), drop = TRUE)
df.target:
structure(list(Tissue = c("Seedling", "Seedling", "Seedling",
"Seedling", "Seedling", "Seedling", "Seedling", "Seedling", "Seedling",
"Seedling", "Seedling", "Seedling", "Seedling", "Seedling", "Seedling",
"Seedling", "Seedling", "Seedling"), Accession = c("Col", "Col",
"Col", "Col", "Col", "Col", "Col", "Col", "Col", "Col", "Col",
"Col", "Col", "Col", "Col", "Col", "Col", "Col"), Genotype = c("sub-9",
"sub-9", "sub-9", "sub-9", "sub-9", "sub-9", "sub-9", "sub-9",
"sub-9", "WT", "WT", "WT", "WT", "WT", "WT", "WT", "WT", "WT"
), Gene = c("CESA1", "CESA1", "CESA1", "CESA3", "CESA3", "CESA3",
"PRC1", "PRC1", "PRC1", "CESA1", "CESA1", "CESA1", "CESA3", "CESA3",
"CESA3", "PRC1", "PRC1", "PRC1"), BReplicate = c("1", "2", "3",
"1", "2", "3", "1", "2", "3", "1", "2", "3", "1", "2", "3", "1",
"2", "3"), Cq = c(23.496195267366, 22.7054342062343, 22.639685306532,
22.8211127310626, 22.0820545565921, 22.1110378642623, 24.1462601520338,
23.3972522049923, 23.3270831096319, 22.5998036632355, 22.7161277680243,
22.6526346162252, 22.237990186265, 22.0715318793714, 22.0171712171306,
23.4524362896598, 23.4121887867123, 23.3648625264175)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -18L), .Names = c("Tissue",
"Accession", "Genotype", "Gene", "BReplicate", "Cq"), vars = c("Tissue",
"Accession", "Genotype", "Gene", "BReplicate"), drop = TRUE, indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L), group_sizes = c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
Tissue = c("Seedling", "Seedling", "Seedling", "Seedling",
"Seedling", "Seedling", "Seedling", "Seedling", "Seedling",
"Seedling", "Seedling", "Seedling", "Seedling", "Seedling",
"Seedling", "Seedling", "Seedling", "Seedling"), Accession = c("Col",
"Col", "Col", "Col", "Col", "Col", "Col", "Col", "Col", "Col",
"Col", "Col", "Col", "Col", "Col", "Col", "Col", "Col"),
Genotype = c("sub-9", "sub-9", "sub-9", "sub-9", "sub-9",
"sub-9", "sub-9", "sub-9", "sub-9", "WT", "WT", "WT", "WT",
"WT", "WT", "WT", "WT", "WT"), Gene = c("CESA1", "CESA1",
"CESA1", "CESA3", "CESA3", "CESA3", "PRC1", "PRC1", "PRC1",
"CESA1", "CESA1", "CESA1", "CESA3", "CESA3", "CESA3", "PRC1",
"PRC1", "PRC1"), BReplicate = c("1", "2", "3", "1", "2",
"3", "1", "2", "3", "1", "2", "3", "1", "2", "3", "1", "2",
"3")), class = "data.frame", row.names = c(NA, -18L), vars = c("Tissue",
"Accession", "Genotype", "Gene", "BReplicate"), drop = TRUE, .Names = c("Tissue",
"Accession", "Genotype", "Gene", "BReplicate")))
答案 0 :(得分:0)
我想您正在寻找Tissue
,Accession
,Genotype
和BReplicate
上的两个数据集的加入。
dplyr
library(dplyr)
df.target %>%
left_join(df.means.refs, by = c("Tissue", "Accession", "Genotype", "BReplicate")) %>%
mutate(diff = Cq.x - Cq.y)
# A tibble: 18 x 8 # Groups: Tissue, Accession, Genotype, Gene, BReplicate [18] Tissue Accession Genotype Gene BReplicate Cq.x Cq.y diff <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> 1 Seedling Col sub-9 CESA1 1 23.49620 25.25401 -1.757810 2 Seedling Col sub-9 CESA1 2 22.70543 24.63870 -1.933265 3 Seedling Col sub-9 CESA1 3 22.63969 24.54072 -1.901038 4 Seedling Col sub-9 CESA3 1 22.82111 25.25401 -2.432893 5 Seedling Col sub-9 CESA3 2 22.08205 24.63870 -2.556644 6 Seedling Col sub-9 CESA3 3 22.11104 24.54072 -2.429686 7 Seedling Col sub-9 PRC1 1 24.14626 25.25401 -1.107745 8 Seedling Col sub-9 PRC1 2 23.39725 24.63870 -1.241447 9 Seedling Col sub-9 PRC1 3 23.32708 24.54072 -1.213641 10 Seedling Col WT CESA1 1 22.59980 24.72950 -2.129700 11 Seedling Col WT CESA1 2 22.71613 24.45446 -1.738328 12 Seedling Col WT CESA1 3 22.65263 24.46047 -1.807839 13 Seedling Col WT CESA3 1 22.23799 24.72950 -2.491513 14 Seedling Col WT CESA3 2 22.07153 24.45446 -2.382923 15 Seedling Col WT CESA3 3 22.01717 24.46047 -2.443303 16 Seedling Col WT PRC1 1 23.45244 24.72950 -1.277067 17 Seedling Col WT PRC1 2 23.41219 24.45446 -1.042267 18 Seedling Col WT PRC1 3 23.36486 24.46047 -1.095611
data.table
library(data.table)
options(datatable.print.class = TRUE)
setDT(df.target)[setDT(df.means.refs), on = .(Tissue, Accession, Genotype, BReplicate),
diff := Cq - i.Cq][]
Tissue Accession Genotype Gene BReplicate Cq diff <char> <char> <char> <char> <char> <num> <num> 1: Seedling Col sub-9 CESA1 1 23.49620 -1.757810 2: Seedling Col sub-9 CESA1 2 22.70543 -1.933265 3: Seedling Col sub-9 CESA1 3 22.63969 -1.901038 4: Seedling Col sub-9 CESA3 1 22.82111 -2.432893 5: Seedling Col sub-9 CESA3 2 22.08205 -2.556644 6: Seedling Col sub-9 CESA3 3 22.11104 -2.429686 7: Seedling Col sub-9 PRC1 1 24.14626 -1.107745 8: Seedling Col sub-9 PRC1 2 23.39725 -1.241447 9: Seedling Col sub-9 PRC1 3 23.32708 -1.213641 10: Seedling Col WT CESA1 1 22.59980 -2.129700 11: Seedling Col WT CESA1 2 22.71613 -1.738328 12: Seedling Col WT CESA1 3 22.65263 -1.807839 13: Seedling Col WT CESA3 1 22.23799 -2.491513 14: Seedling Col WT CESA3 2 22.07153 -2.382923 15: Seedling Col WT CESA3 3 22.01717 -2.443303 16: Seedling Col WT PRC1 1 23.45244 -1.277067 17: Seedling Col WT PRC1 2 23.41219 -1.042267 18: Seedling Col WT PRC1 3 23.36486 -1.095611