根据与数据框的匹配来过滤大型命名列表

时间:2020-12-19 17:51:04

标签: r list

我不经常使用 R 中的列表,所以我确信这里有一个简单的解决方案。我正在处理一个大型的 KEGG 通路 ID (test1) 命名列表。在每个 KEGG 途径 ID (koXXXXX) 中是该途径中每个基因的列表 (K#####)。我有一些重要的基因 (test2) 及其相关的 KEGG ID (test2$kegg_id; K#####)。我想过滤 test1 以仅包含包含至少一个匹配的 $kegg_id 来自 test2 的 KEGG 通路 ID(即包含匹配的 test2$kegg_id 值)。我想保留来自 test_1 的所有信息,但仅适用于在 test2$kegg_id 中具有匹配 K##### 的路径。

然后我想创建一个仅包含那些 KEGG 通路 ID 的字符向量。

这是数据的一个子集:

dput(test1)
list(`ko00970 Aminoacyl-tRNA biosynthesis` = c("K00604", "K01042", 
"K01866", "K01867", "K01868", "K01869", "K01870", "K01872", "K01873", 
"K01874", "K01875", "K01876", "K01878", "K01879", "K01880", "K01881", 
"K01883", "K01884", "K01885", "K01886", "K01887", "K01889", "K01890", 
"K01892", "K01893", "K02433", "K02434", "K02435", "K03330", "K03341", 
"K03865", "K04566", "K04567", "K06868", "K07587", "K09482", "K09698", 
"K09759", "K10837", "K11627", "K14163", "K14164", "K14218", "K14219", 
"K14220", "K14221", "K14222", "K14223", "K14224", "K14225", "K14226", 
"K14227", "K14228", "K14229", "K14230", "K14231", "K14232", "K14233", 
"K14234", "K14235", "K14236", "K14237", "K14238", "K14239", "K22503", 
"K24278"), `ko02010 ABC transporters` = c("K01995", "K01996", 
"K01997", "K01998", "K01999", "K02000", "K02001", "K02002", "K02006", 
"K02007", "K02008", "K02009", "K02010", "K02011", "K02012", "K02017", 
"K02018", "K02020", "K02036", "K02037", "K02038", "K02040", "K02041", 
"K02042", "K02044", "K02045", "K02046", "K02047", "K02048", "K02062", 
"K02063", "K02064", "K02065", "K02066", "K02067", "K02071", "K02072", 
"K02073", "K02193", "K02194", "K02195", "K02196", "K02424", "K02471", 
"K03523", "K05031", "K05032", "K05033", "K05641", "K05642", "K05643", 
"K05644", "K05645", "K05646", "K05647", "K05648", "K05649", "K05650", 
"K05651", "K05652", "K05653", "K05654", "K05655", "K05656", "K05657", 
"K05658", "K05659", "K05660", "K05661", "K05662", "K05663", "K05664", 
"K05665", "K05666", "K05667", "K05668", "K05669", "K05670", "K05671", 
"K05672", "K05673", "K05674", "K05675", "K05676", "K05677", "K05678", 
"K05679", "K05680", "K05681", "K05682", "K05683", "K05684", "K05685", 
"K05772", "K05773", "K05776", "K05813", "K05814", "K05815", "K05816", 
"K05845", "K05846", "K05847", "K06073", "K06074", "K06159", "K06160", 
"K06161", "K06726", "K06857", "K06858", "K06861", "K07091", "K07122", 
"K07323", "K07335", "K08711", "K08712", "K09688", "K09689", "K09690", 
"K09691", "K09692", "K09693", "K09694", "K09695", "K09696", "K09697", 
"K09808", "K09810", "K09811", "K09812", "K09813", "K09814", "K09815", 
"K09816", "K09817", "K09969", "K09970", "K09971", "K09972", "K09996", 
"K09997", "K09998", "K09999", "K10000", "K10001", "K10002", "K10003", 
"K10004", "K10005", "K10006", "K10007", "K10008", "K10009", "K10010", 
"K10013", "K10014", "K10015", "K10016", "K10017", "K10018", "K10019", 
"K10020", "K10021", "K10022", "K10023", "K10024", "K10025", "K10036", 
"K10037", "K10038", "K10039", "K10040", "K10041", "K10094", "K10107", 
"K10108", "K10109", "K10110", "K10111", "K10112", "K10117", "K10118", 
"K10119", "K10188", "K10189", "K10190", "K10191", "K10192", "K10193", 
"K10194", "K10195", "K10196", "K10197", "K10198", "K10199", "K10200", 
"K10201", "K10202", "K10227", "K10228", "K10229", "K10232", "K10233", 
"K10234", "K10235", "K10236", "K10237", "K10238", "K10240", "K10241", 
"K10242", "K10439", "K10440", "K10441", "K10537", "K10538", "K10539", 
"K10540", "K10541", "K10542", "K10543", "K10544", "K10545", "K10546", 
"K10547", "K10548", "K10549", "K10550", "K10551", "K10552", "K10553", 
"K10554", "K10555", "K10556", "K10557", "K10558", "K10559", "K10560", 
"K10561", "K10562", "K10820", "K10823", "K10824", "K10829", "K10830", 
"K10831", "K11004", "K11050", "K11051", "K11069", "K11070", "K11071", 
"K11072", "K11073", "K11074", "K11075", "K11076", "K11077", "K11078", 
"K11079", "K11080", "K11081", "K11082", "K11083", "K11084", "K11085", 
"K11601", "K11602", "K11603", "K11604", "K11605", "K11606", "K11607", 
"K11631", "K11632", "K11704", "K11705", "K11706", "K11707", "K11708", 
"K11709", "K11710", "K11720", "K11950", "K11951", "K11952", "K11953", 
"K11954", "K11955", "K11956", "K11957", "K11958", "K11959", "K11960", 
"K11961", "K11962", "K11963", "K12292", "K12368", "K12369", "K12370", 
"K12371", "K12372", "K12533", "K12536", "K12539", "K12541", "K13409", 
"K13889", "K13890", "K13891", "K13892", "K13893", "K13894", "K13895", 
"K13896", "K14698", "K14699", "K15495", "K15496", "K15497", "K15551", 
"K15552", "K15553", "K15554", "K15555", "K15556", "K15557", "K15558", 
"K15576", "K15577", "K15578", "K15579", "K15580", "K15581", "K15582", 
"K15583", "K15584", "K15585", "K15586", "K15587", "K15598", "K15599", 
"K15600", "K15628", "K15770", "K15771", "K15772", "K16012", "K16013", 
"K16014", "K16199", "K16200", "K16201", "K16202", "K16299", "K16783", 
"K16784", "K16785", "K16786", "K16787", "K16905", "K16906", "K16907", 
"K16915", "K16916", "K16917", "K16918", "K16919", "K16920", "K16921", 
"K16956", "K16957", "K16958", "K16959", "K16960", "K16961", "K16962", 
"K16963", "K17062", "K17063", "K17073", "K17074", "K17076", "K17077", 
"K17202", "K17203", "K17204", "K17205", "K17206", "K17207", "K17208", 
"K17209", "K17210", "K17213", "K17214", "K17215", "K17234", "K17235", 
"K17236", "K17237", "K17238", "K17239", "K17240", "K17241", "K17242", 
"K17243", "K17244", "K17245", "K17246", "K17311", "K17312", "K17313", 
"K17314", "K17315", "K17316", "K17317", "K17318", "K17319", "K17320", 
"K17321", "K17322", "K17323", "K17324", "K17325", "K17326", "K17327", 
"K17328", "K17329", "K17330", "K17331", "K18104", "K18216", "K18217", 
"K18230", "K18231", "K18232", "K18233", "K18887", "K18888", "K18889", 
"K18890", "K18891", "K18892", "K18893", "K18894", "K18895", "K19079", 
"K19080", "K19083", "K19084", "K19226", "K19227", "K19228", "K19229", 
"K19230", "K19309", "K19310", "K19340", "K19341", "K19349", "K19350", 
"K19971", "K19972", "K19973", "K19975", "K19976", "K20344", "K20386", 
"K20459", "K20460", "K20461", "K20490", "K20491", "K20492", "K20494", 
"K22921", "K22922", "K22923", "K23055", "K23056", "K23057", "K23058", 
"K23059", "K23060", "K23061", "K23062", "K23063", "K23064", "K23125", 
"K23163", "K23181", "K23182", "K23183", "K23184", "K23185", "K23186", 
"K23187", "K23188", "K23227", "K23228", "K23508", "K23509", "K23510", 
"K23511", "K23512", "K23513", "K23535", "K23536", "K23537", "K23545", 
"K23546", "K23547"), `ko02020 Two-component system` = c("K00027", 
"K00066", "K00244", "K00245", "K00246", "K00247", "K00370", "K00371", 
"K00373", "K00374", "K00404", "K00405", "K00406", "K00407", "K00410", 
"K00411", "K00412", "K00413", "K00424", "K00425", "K00426", "K00494", 
"K00575", "K00626", "K00689", "K00692", "K00990", "K01034", "K01035", 
"K01051", "K01077", "K01104", "K01113", "K01179", "K01425", "K01467", 
"K01545", "K01546", "K01547", "K01548", "K01643", "K01644", "K01646", 
"K01791", "K01910", "K01915", "K01991", "K02040", "K02106", "K02252", 
"K02253", "K02259", "K02313", "K02398", "K02402", "K02403", "K02405", 
"K02406", "K02472", "K02488", "K02489", "K02490", "K02491", "K02556", 
"K02584", "K02650", "K02657", "K02658", "K02659", "K02660", "K02661", 
"K02667", "K02668", "K03092", "K03367", "K03400", "K03406", "K03407", 
"K03408", "K03412", "K03413", "K03415", "K03532", "K03533", "K03563", 
"K03620", "K03739", "K03740", "K03776", "K04751", "K04771", "K05338", 
"K05339", "K05597", "K05874", "K05875", "K05876", "K05877", "K05964", 
"K05966", "K06046", "K06080", "K06281", "K06282", "K06347", "K06375", 
"K06596", "K06597", "K06598", "K07165", "K07260", "K07636", "K07637", 
"K07638", "K07639", "K07640", "K07641", "K07642", "K07643", "K07644", 
"K07645", "K07646", "K07647", "K07648", "K07649", "K07650", "K07651", 
"K07652", "K07653", "K07654", "K07655", "K07656", "K07657", "K07658", 
"K07659", "K07660", "K07661", "K07662", "K07663", "K07664", "K07665", 
"K07666", "K07667", "K07668", "K07669", "K07670", "K07671", "K07672", 
"K07673", "K07674", "K07675", "K07676", "K07677", "K07678", "K07679", 
"K07680", "K07681", "K07682", "K07683", "K07684", "K07685", "K07686", 
"K07687", "K07688", "K07689", "K07690", "K07691", "K07692", "K07693", 
"K07694", "K07695", "K07696", "K07697", "K07698", "K07699", "K07700", 
"K07701", "K07702", "K07703", "K07704", "K07705", "K07706", "K07707", 
"K07708", "K07709", "K07710", "K07711", "K07712", "K07713", "K07714", 
"K07715", "K07716", "K07717", "K07718", "K07719", "K07720", "K07768", 
"K07769", "K07770", "K07771", "K07772", "K07773", "K07774", "K07775", 
"K07776", "K07777", "K07778", "K07780", "K07781", "K07782", "K07783", 
"K07784", "K07785", "K07786", "K07787", "K07788", "K07789", "K07790", 
"K07792", "K07793", "K07794", "K07795", "K07796", "K07797", "K07798", 
"K07799", "K07800", "K07801", "K07803", "K07804", "K07805", "K07806", 
"K07810", "K07811", "K07813", "K08082", "K08083", "K08348", "K08349", 
"K08350", "K08357", "K08358", "K08359", "K08372", "K08475", "K08476", 
"K08477", "K08478", "K08479", "K08641", "K08738", "K08926", "K08927", 
"K08928", "K08929", "K08930", "K08939", "K09474", "K09475", "K09476", 
"K09477", "K09696", "K09697", "K10001", "K10002", "K10003", "K10004", 
"K10125", "K10126", "K10255", "K10681", "K10682", "K10697", "K10715", 
"K10850", "K10851", "K10909", "K10910", "K10911", "K10912", "K10913", 
"K10914", "K10916", "K10941", "K10942", "K10943", "K11103", "K11230", 
"K11231", "K11232", "K11233", "K11326", "K11327", "K11328", "K11329", 
"K11330", "K11331", "K11332", "K11354", "K11355", "K11356", "K11357", 
"K11382", "K11383", "K11384", "K11443", "K11444", "K11520", "K11521", 
"K11522", "K11523", "K11524", "K11525", "K11526", "K11601", "K11602", 
"K11603", "K11614", "K11615", "K11616", "K11617", "K11618", "K11619", 
"K11620", "K11621", "K11622", "K11623", "K11624", "K11625", "K11626", 
"K11629", "K11630", "K11631", "K11632", "K11633", "K11634", "K11635", 
"K11636", "K11637", "K11638", "K11639", "K11640", "K11641", "K11688", 
"K11689", "K11690", "K11691", "K11692", "K11711", "K11712", "K12292", 
"K12293", "K12294", "K12295", "K12296", "K12340", "K12415", "K12530", 
"K12531", "K12532", "K13040", "K13041", "K13061", "K13486", "K13487", 
"K13488", "K13489", "K13490", "K13491", "K13532", "K13533", "K13584", 
"K13587", "K13588", "K13589", "K13598", "K13599", "K13815", "K13816", 
"K13924", "K13927", "K13991", "K13994", "K14188", "K14205", "K14978", 
"K14979", "K14980", "K14981", "K14982", "K14983", "K14986", "K14987", 
"K14988", "K14989", "K15011", "K15012", "K15739", "K15841", "K15850", 
"K15851", "K15853", "K15854", "K15859", "K15860", "K15861", "K15862", 
"K16692", "K16712", "K16713", "K17060", "K17061", "K18072", "K18073", 
"K18093", "K18094", "K18095", "K18321", "K18322", "K18323", "K18324", 
"K18326", "K18344", "K18345", "K18346", "K18347", "K18348", "K18349", 
"K18350", "K18351", "K18352", "K18353", "K18354", "K18444", "K18856", 
"K18866", "K18940", "K18941", "K18986", "K18987", "K19077", "K19078", 
"K19079", "K19080", "K19081", "K19082", "K19083", "K19084", "K19609", 
"K19610", "K19611", "K19615", "K19616", "K19617", "K19618", "K19620", 
"K19621", "K19622", "K19624", "K19641", "K19661", "K19666", "K19667", 
"K19668", "K19690", "K19691", "K19692", "K20263", "K20264", "K20339", 
"K20340", "K20482", "K20483", "K20484", "K20485", "K20486", "K20487", 
"K20488", "K20489", "K20490", "K20491", "K20492", "K20494", "K20552", 
"K20973", "K20974", "K20975", "K20976", "K20977", "K20978", "K22501", 
"K23236", "K23514", "K23548", "K23549"), `ko02024 Quorum sensing` = c("K00494", 
"K01114", "K01218", "K01318", "K01364", "K01399", "K01497", "K01580", 
"K01626", "K01635", "K01657", "K01658", "K01728", "K01897", "K01995", 
"K01996", "K01997", "K01998", "K01999", "K02031", "K02032", "K02033", 
"K02034", "K02035", "K02052", "K02053", "K02054", "K02055", "K02250", 
"K02251", "K02252", "K02253", "K02402", "K02403", "K02490", "K03070", 
"K03071", "K03073", "K03075", "K03076", "K03106", "K03110", "K03210", 
"K03217", "K03400", "K03666", "K06046", "K06352", "K06353", "K06354", 
"K06355", "K06356", "K06358", "K06359", "K06360", "K06361", "K06363", 
"K06364", "K06365", "K06366", "K06369", "K06375", "K06998", "K07173", 
"K07344", "K07645", "K07666", "K07667", "K07680", "K07691", "K07692", 
"K07699", "K07706", "K07707", "K07711", "K07715", "K07781", "K07782", 
"K07800", "K07813", "K08321", "K08605", "K08642", "K08777", "K09823", 
"K09936", "K10555", "K10556", "K10557", "K10558", "K10715", "K10823", 
"K10909", "K10910", "K10911", "K10912", "K10913", "K10914", "K10915", 
"K10916", "K10917", "K11006", "K11007", "K11031", "K11033", "K11034", 
"K11035", "K11036", "K11037", "K11039", "K11063", "K11216", "K11530", 
"K11531", "K11752", "K12257", "K12292", "K12293", "K12294", "K12295", 
"K12296", "K12415", "K12789", "K12990", "K13060", "K13061", "K13062", 
"K13063", "K13075", "K13815", "K13816", "K14051", "K14645", "K14982", 
"K14983", "K15580", "K15581", "K15582", "K15583", "K15654", "K15655", 
"K15656", "K15657", "K15850", "K15851", "K15852", "K15853", "K15854", 
"K16619", "K17940", "K18000", "K18001", "K18002", "K18003", "K18096", 
"K18098", "K18099", "K18100", "K18101", "K18139", "K18304", "K18306", 
"K18307", "K18315", "K18316", "K18317", "K18318", "K18319", "K19666", 
"K19731", "K19732", "K19733", "K19734", "K19735", "K20086", "K20087", 
"K20088", "K20089", "K20090", "K20248", "K20249", "K20250", "K20252", 
"K20253", "K20256", "K20257", "K20258", "K20259", "K20260", "K20261", 
"K20262", "K20263", "K20264", "K20265", "K20266", "K20267", "K20268", 
"K20269", "K20270", "K20271", "K20272", "K20273", "K20274", "K20275", 
"K20276", "K20277", "K20321", "K20322", "K20323", "K20324", "K20325", 
"K20326", "K20327", "K20328", "K20329", "K20330", "K20331", "K20332", 
"K20333", "K20334", "K20335", "K20336", "K20337", "K20338", "K20339", 
"K20340", "K20341", "K20342", "K20343", "K20344", "K20345", "K20373", 
"K20374", "K20375", "K20376", "K20377", "K20378", "K20379", "K20380", 
"K20381", "K20382", "K20383", "K20384", "K20385", "K20386", "K20387", 
"K20388", "K20389", "K20390", "K20391", "K20480", "K20481", "K20482", 
"K20483", "K20484", "K20485", "K20486", "K20487", "K20488", "K20489", 
"K20490", "K20491", "K20492", "K20494", "K20527", "K20528", "K20529", 
"K20530", "K20531", "K20532", "K20533", "K20539", "K20540", "K20552", 
"K20554", "K20555", "K22954", "K22955", "K22956", "K22957", "K22968", 
"K23133"), `ko02025 Biofilm formation - Pseudomonas aeruginosa` = c("K01657", 
"K01658", "K01768", "K02398", "K02405", "K02657", "K02658", "K02659", 
"K02660", "K03563", "K03651", "K06596", "K06598", "K07678", "K07689", 
"K10914", "K10941", "K11444", "K11890", "K11891", "K11893", "K11895", 
"K11900", "K11901", "K11902", "K11903", "K11907", "K11912", "K11913", 
"K11915", "K12990", "K12992", "K13060", "K13061", "K13487", "K13488", 
"K13489", "K13490", "K13491", "K16011", "K17940", "K18000", "K18001", 
"K18002", "K18003", "K18099", "K18100", "K18101", "K18304", "K19291", 
"K19735", "K20257", "K20258", "K20259", "K20968", "K20969", "K20970", 
"K20971", "K20972", "K20973", "K20974", "K20975", "K20976", "K20977", 
"K20978", "K20987", "K20997", "K20998", "K20999", "K21000", "K21001", 
"K21002", "K21003", "K21004", "K21005", "K21006", "K21007", "K21008", 
"K21009", "K21010", "K21011", "K21012", "K21019", "K21020", "K21021", 
"K21022", "K21023", "K21024", "K21025", "K23127"), `ko02026 Biofilm formation - Escherichia coli` = c("K00688", 
"K00694", "K00703", "K00975", "K01991", "K02398", "K02402", "K02403", 
"K02405", "K02425", "K02777", "K03087", "K03563", "K03566", "K03567", 
"K04333", "K04334", "K04335", "K04336", "K04761", "K05851", "K06204", 
"K07173", "K07638", "K07648", "K07659", "K07676", "K07677", "K07678", 
"K07687", "K07689", "K07773", "K07781", "K07782", "K10914", "K11531", 
"K11931", "K11935", "K11936", "K11937", "K12687", "K14051", "K18502", 
"K18504", "K18509", "K18515", "K18516", "K18518", "K18521", "K18522", 
"K18523", "K18528", "K18968", "K21084", "K21085", "K21086", "K21087", 
"K21088", "K21089", "K21090", "K21091"))

还有一个带有有趣基因的截断数据框

dput(test2)
structure(list(gene_id = c("G6381", "G12285", "G10911", "G17366", 
"G3593", "G17753"), kegg_id = c("K18523", "K19009", "K07782", 
"K02398", "K21407", "K00922")), row.names = c(NA, 6L), class = "data.frame")

1 个答案:

答案 0 :(得分:1)

如果我们需要获取相应的'gene_id',从'test2'创建一个命名的vector,循环遍历list('test1'),将那些'kegg_id'与命名的向量以提取 'gene_id' 并使用 na.omit

删除不匹配的元素
nm1 <- with(test2, setNames(gene_id, kegg_id))
lst1 <- lapply(test1, function(x) as.vector(na.omit(nm1[x])))

如果我们需要Filter原来的list

test1[lengths(lst1) > 0]

或对Filter子集list

lst1[lengths(lst1) > 0]