我有这样的数据(它只是我数据中的几行):
> dput(head(data_met))
structure(list(X = c(3L, 19L, 28L, 33L, 34L, 35L), Pathway = c(0L,
0L, 0L, 0L, 0L, 0L), Gene.name = structure(1:6, .Label = c("AT1G01090",
"AT1G02500", "AT1G03130", "AT1G03330", "AT1G03475", "AT1G03630",
"AT1G04410", "AT1G06410", "AT1G06570", "AT1G06680", "AT1G07370",
"AT1G07780", "AT1G08490"), class = "factor"), X1_1 = c(0, 0,
0, 0, 0, 0), X1_2 = c(0, 0, 0, 1.133759575, 0, 0), X1_3 = c(0,
1.389359906, 0, 0, 1, 0), X1_4 = c(0, 0.628125036, 0, 1.161302798,
1, 0), X1_5 = c(0, 1.231056083, 0, 0.50892594, 0.01594858, 0),
X1_6 = c(0.186450525, 1, 0, 0.514811996, 0, 0), X1_7 = c(0,
1.149134552, 0, 0, 0, 0), X1_8 = c(0.105799533, 0.386877662,
0, 0, 0, 0), X1_9 = c(0.763452684, 0.554554123, 0, 0, 0,
0), X1_10 = c(0.979400044, 0, 0, 0, 0, 0), X1_11 = c(1.120768885,
0.274641072, 0, 0, 0, 0.690696043), X1_12 = c(1, 0, 0, 0,
0, 0), X1_13 = c(1.276209448, 0, 0, 0, 0, 0), X1_14 = c(0.970143925,
0, 1, 0, 0, 0), X1_15 = c(0L, 0L, 0L, 0L, 0L, 0L), X1_16 = c(0.85529218,
0, 0, 0, 0, 1), X1_17 = c(0L, 0L, 0L, 0L, 0L, 0L), X1_18 = c(1.018244397,
0, 0, 0, 0, 0), X1_19 = c(2.138711024, 0, 0, 0, 0, 0), X1_20 = c(0L,
0L, 0L, 0L, 0L, 0L), X1_21 = c(1.554696031, 0, 0, 0, 0, 0
), X1_22 = c(0L, 0L, 0L, 0L, 0L, 0L), X1_23 = c(0L, 0L, 0L,
0L, 0L, 0L), X1_24 = c(1.681551744, 0, 1, 0, 0, 1.309303957
)), .Names = c("X", "Pathway", "Gene.name", "X1_1", "X1_2",
"X1_3", "X1_4", "X1_5", "X1_6", "X1_7", "X1_8", "X1_9", "X1_10",
"X1_11", "X1_12", "X1_13", "X1_14", "X1_15", "X1_16", "X1_17",
"X1_18", "X1_19", "X1_20", "X1_21", "X1_22", "X1_23", "X1_24"
), row.names = c(NA, 6L), class = "data.frame")
开头有一栏名为" Pathway"我希望用一些字填充一些行(" aa_metabolism")。我想要填充的行列表可以在此向量中找到:
> dput(aa)
c("AT1G02500", "AT1G07780", "AT1G09780", "AT1G09795", "AT1G12230",
"AT1G13440", "AT1G14810", "AT1G15710", "AT1G16300", "AT1G17290",
"AT1G17745", "AT1G18500", "AT1G18640", "AT1G23310", "AT1G25220",
"AT1G31180", "AT1G31230", "AT1G31860", "AT1G32440", "AT1G48850",
"AT1G48860", "AT1G54100", "AT1G56190", "AT1G58080", "AT1G62800",
"AT1G65930", "AT1G70580", "AT1G71920", "AT1G72330", "AT1G72810",
"AT1G74030", "AT1G74040", "AT1G75330", "AT1G79550", "AT1G80560",
"AT1G80600", "AT2G01140", "AT2G01290", "AT2G04400", "AT2G05710",
"AT2G17130", "AT2G17265", "AT2G17630", "AT2G19940", "AT2G21170",
"AT2G21330", "AT2G22250", "AT2G22480", "AT2G27820", "AT2G29560",
"AT2G29690", "AT2G30970", "AT2G31810", "AT2G36230", "AT2G36460",
"AT2G36530", "AT2G36580", "AT2G36880", "AT2G37500", "AT2G39800",
"AT2G42790", "AT2G43090", "AT2G43100", "AT2G43750", "AT2G44040",
"AT2G44350", "AT2G45290", "AT2G45300", "AT2G45440", "AT3G01120",
"AT3G03780", "AT3G04120", "AT3G04520", "AT3G04790", "AT3G04940",
"AT3G06350", "AT3G07630", "AT3G08590", "AT3G09810", "AT3G10050",
"AT3G12780", "AT3G13110", "AT3G14390", "AT3G17390", "AT3G17820",
"AT3G19480", "AT3G19710", "AT3G22960", "AT3G23940", "AT3G29200",
"AT3G48560", "AT3G49680", "AT3G50520", "AT3G52930", "AT3G52990",
"AT3G53580", "AT3G54640", "AT3G55440", "AT3G55610", "AT3G57050",
"AT3G57560", "AT3G58610", "AT3G58990", "AT3G59760", "AT3G59890",
"AT3G60750", "AT3G60880", "AT3G61440", "AT4G01850", "AT4G02610",
"AT4G08870", "AT4G08900", "AT4G13430", "AT4G13930", "AT4G14880",
"AT4G17830", "AT4G19710", "AT4G23600", "AT4G24830", "AT4G26390",
"AT4G26530", "AT4G26900", "AT4G26970", "AT4G29220", "AT4G29840",
"AT4G31990", "AT4G32520", "AT4G33510", "AT4G33680", "AT4G34200",
"AT4G35260", "AT4G35630", "AT4G35830", "AT4G37670", "AT4G37930",
"AT4G38220", "AT4G38970", "AT4G39980", "AT5G03290", "AT5G05730",
"AT5G08570", "AT5G10870", "AT5G10920", "AT5G11520", "AT5G11880",
"AT5G13280", "AT5G13420", "AT5G14060", "AT5G14200", "AT5G14590",
"AT5G14800", "AT5G16290", "AT5G17920", "AT5G17990", "AT5G19550",
"AT5G20980", "AT5G22620", "AT5G26780", "AT5G28020", "AT5G28030",
"AT5G35630", "AT5G36160", "AT5G37600", "AT5G38530", "AT5G44520",
"AT5G48220", "AT5G52920", "AT5G53460", "AT5G54810", "AT5G56350",
"AT5G56630", "AT5G57850", "AT5G57890", "AT5G61410", "AT5G61580",
"AT5G63680", "AT5G63890", "AT5G65780", "AT5G66120")
这些名字可以在我要填写的data.frame
的第二栏中找到。此列的名称为" Gene.name"。
添加了:
第二载体/代谢:
> dput(rnadegrad)
c("AT1G03330", "AT1G21190", "AT1G26230", "AT1G49760", "AT1G54490",
"AT1G55490", "AT1G65700", "AT1G74030", "AT1G75660", "AT2G06990",
"AT2G17510", "AT2G23350", "AT2G25355", "AT2G29560", "AT2G33210",
"AT2G36530", "AT2G43810", "AT2G45810", "AT3G03710", "AT3G13300",
"AT3G13470", "AT3G13860", "AT3G23990", "AT3G52150", "AT3G60500",
"AT3G61240", "AT3G61620", "AT4G00660", "AT4G34110", "AT4G37910",
"AT5G27720", "AT5G35430", "AT5G48870", "AT5G56500")
MAin表(多一些行):
> dput(tbl_test)
structure(list(X = c(3L, 19L, 28L, 33L, 34L, 35L, 45L, 66L, 69L,
72L, 79L, 82L, 88L, 89L, 90L, 101L, 103L, 107L, 108L, 114L, 115L,
129L, 137L, 138L, 155L, 169L, 171L, 179L, 186L, 189L, 195L, 200L,
205L, 214L, 216L, 217L, 222L, 224L, 229L, 233L, 234L, 239L, 258L,
261L, 262L, 266L, 271L, 272L, 277L, 279L, 285L, 301L, 306L, 320L,
324L, 327L, 334L, 337L, 341L, 342L, 348L, 351L, 360L, 391L, 397L,
401L, 402L, 411L, 416L, 429L, 436L, 439L, 444L, 450L, 453L, 456L,
457L, 459L, 466L, 472L, 484L, 488L, 493L, 498L, 508L, 515L, 519L,
529L, 540L, 546L, 548L, 555L, 556L, 557L, 575L, 579L, 584L, 585L,
589L, 601L, 604L, 606L, 610L, 620L, 621L, 624L, 625L, 630L, 641L,
643L, 685L, 692L, 710L, 711L, 713L, 717L, 729L, 741L, 748L, 751L,
758L, 770L, 780L, 783L, 787L, 788L, 802L, 806L, 808L, 814L, 817L,
823L, 827L, 831L, 833L, 836L, 845L, 850L, 854L, 861L, 863L, 868L,
871L, 879L, 898L, 899L, 900L, 917L, 918L, 920L, 926L, 937L, 946L,
969L, 972L, 973L, 974L, 975L, 977L, 984L, 995L, 1008L, 1013L,
1014L, 1019L, 1040L, 1045L, 1046L, 1053L, 1057L, 1064L, 1069L,
1070L, 1079L, 1080L, 1081L, 1088L, 1100L, 1109L, 1112L, 1132L,
1136L, 1138L, 1142L, 1146L, 1148L, 1154L, 1158L, 1160L, 1165L,
1168L, 1171L, 1184L, 1187L, 1199L, 1208L, 1213L, 1220L, 1225L
), Pathway = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Gene.name = structure(1:199, .Label = c("AT1G01090",
"AT1G02500", "AT1G03130", "AT1G03330", "AT1G03475", "AT1G03630",
"AT1G04410", "AT1G06410", "AT1G06570", "AT1G06680", "AT1G07370",
"AT1G07780", "AT1G08490", "AT1G08520", "AT1G08550", "AT1G09430",
"AT1G09620", "AT1G09780", "AT1G09795", "AT1G10590", "AT1G10670",
"AT1G11870", "AT1G12230", "AT1G12240", "AT1G13440", "AT1G14610",
"AT1G14810", "AT1G15710", "AT1G16300", "AT1G16460", "AT1G17050",
"AT1G17290", "AT1G17745", "AT1G18500", "AT1G18590", "AT1G18640",
"AT1G19920", "AT1G20020", "AT1G20340", "AT1G20620", "AT1G20630",
"AT1G21190", "AT1G22940", "AT1G23190", "AT1G23310", "AT1G23800",
"AT1G24100", "AT1G24180", "AT1G25220", "AT1G25350", "AT1G26230",
"AT1G27680", "AT1G28350", "AT1G29880", "AT1G30120", "AT1G30510",
"AT1G31180", "AT1G31230", "AT1G31860", "AT1G31910", "AT1G32440",
"AT1G32550", "AT1G34430", "AT1G48030", "AT1G48520", "AT1G48850",
"AT1G48860", "AT1G49760", "AT1G50200", "AT1G51680", "AT1G52340",
"AT1G52400", "AT1G53240", "AT1G53580", "AT1G53830", "AT1G54100",
"AT1G54220", "AT1G54490", "AT1G55490", "AT1G56190", "AT1G58080",
"AT1G58290", "AT1G59900", "AT1G60550", "AT1G62180", "AT1G62660",
"AT1G62800", "AT1G63970", "AT1G65060", "AT1G65700", "AT1G65930",
"AT1G66430", "AT1G66520", "AT1G66530", "AT1G69740", "AT1G70290",
"AT1G70580", "AT1G70730", "AT1G70980", "AT1G71920", "AT1G72330",
"AT1G72550", "AT1G72810", "AT1G74030", "AT1G74040", "AT1G74090",
"AT1G74100", "AT1G74470", "AT1G75330", "AT1G75660", "AT1G79230",
"AT1G79550", "AT1G80560", "AT1G80600", "AT2G01140", "AT2G01290",
"AT2G02500", "AT2G04400", "AT2G04842", "AT2G05710", "AT2G06990",
"AT2G14750", "AT2G17130", "AT2G17265", "AT2G17510", "AT2G17630",
"AT2G19940", "AT2G20420", "AT2G20610", "AT2G21170", "AT2G21330",
"AT2G21590", "AT2G22250", "AT2G22480", "AT2G22780", "AT2G23350",
"AT2G24490", "AT2G25355", "AT2G25840", "AT2G26540", "AT2G26670",
"AT2G26930", "AT2G27150", "AT2G27820", "AT2G29560", "AT2G29630",
"AT2G29690", "AT2G30970", "AT2G31170", "AT2G31390", "AT2G31810",
"AT2G33210", "AT2G34630", "AT2G36230", "AT2G36390", "AT2G36460",
"AT2G36530", "AT2G36580", "AT2G36880", "AT2G37500", "AT2G38700",
"AT2G39800", "AT2G40300", "AT2G40490", "AT2G40840", "AT2G42790",
"AT2G43090", "AT2G43100", "AT2G43750", "AT2G43810", "AT2G44040",
"AT2G44350", "AT2G44490", "AT2G45290", "AT2G45300", "AT2G45440",
"AT2G45810", "AT2G47510", "AT3G01120", "AT3G01440", "AT3G02660",
"AT3G02760", "AT3G02780", "AT3G03250", "AT3G03710", "AT3G03780",
"AT3G04120", "AT3G04520", "AT3G04600", "AT3G04790", "AT3G04870",
"AT3G04940", "AT3G06350", "AT3G06650", "AT3G07630", "AT3G08590",
"AT3G09150", "AT3G09810", "AT3G10050"), class = "factor"), X1_1 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1.790289807, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 2.55899547, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9.167631809, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1.039532599, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 2.445322589, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0)), .Names = c("X", "Pathway", "Gene.name",
"X1_1"), class = "data.frame", row.names = c(NA, -199L))
所需输出(仅手动编辑):
> data_met[,1:2]
Pathway Gene.name
3 aa_metabolism AT1G01090
19 RNA degradation AT1G07780
28 aa_metabolism, RNA degradation AT1G03130
33 0 AT1G14810
34 RNA degradation AT1G26230
35 0 AT1G03630
依旧......
答案 0 :(得分:3)
可能你需要
data_met$Pathway[data_met$Gene.name %in% a] <- 'aa_metabolism'
在示例数据中,Gene.name
元素与a
不匹配。假设该列在a
中有一些元素。
data_met$Gene.name <- as.character(data_met$Gene.name)
data_met$Gene.name[2] <- as.character(a[2])
data_met$Gene.name[4] <- as.character(a[7])
当我们与a
向量进行比较时,即。第一个向量,像以前一样替换元素
data_met$Pathway[data_met$Gene.name %in% a] <- 'aa_metabolism'
创建第二个向量a1
a1 <- a[1:6]
data_met$Gene.name[5] <- as.character(a1[3])
data_met$Pathway <- with(data_met, ifelse(Gene.name %in% a1 & Pathway!=0,
paste(Pathway, 'new_metabolism', sep=","),
ifelse(Gene.name %in% a1 & Pathway==0, 'new_metabolism', Pathway)))
对于新数据集,
tbl_test$Pathway[tbl_test$Gene.name %in% aa] <- 'aa_metabolism'
tbl_test$Pathway <- with(tbl_test, ifelse(Gene.name %in%
rnadegrad & Pathway!=0,paste(Pathway, 'RNA_degradation', sep=","),
ifelse(Gene.name %in% rnadegrad & Pathway==0,
'RNA_degradation', Pathway)))