我有一个数据框,将每个基因座链接到一个途径。 我想制作一个关联矩阵,其中每一行代表一条路径,每列都是轨迹。如果该路径位于该路径中,则为1,否则为0.
我写了一些东西试试这样做,但这花了太长时间。我有大约6种不同的途径集,每种途径有数百种途径,我有近6000个基因座名称。
我正在使用复杂的双重保护结构。 任何人都可以建议一种更有效的方法吗?
这是我尝试的代码,这花了太长时间。
PA14_BioCycAllPwy_binVecs<-sapply(df$pwy_id,function(pwy) {
sapply(PA14_annot_UCSD$PA14_locus,function(loc) ifelse(nrow(subset(df,df$PA14_locus==locus && df$pwy_id==pwy))>0,1,0))
})
这是一个示例locus2pathway数据帧。
> dput(df[1:100,])
structure(list(PA14_locus = c("PA14_57720", "PA14_57710", "PA14_21370",
"PA14_21340", "PA14_23560", "PA14_58190", "PA14_58180", "PA14_58170",
"PA14_61760", "PA14_66750", "PA14_41360", "PA14_41380", "PA14_23560",
"PA14_60370", "PA14_12230", "PA14_14440", "PA14_52600", "PA14_51820",
"PA14_08560", "PA14_10420", "PA14_57670", "PA14_28650", "PA14_30330",
"PA14_51900", "PA14_28710", "PA14_28690", "PA14_19050", "PA14_16530",
"PA14_14890", "PA14_00100", "PA14_00090", "PA14_08660", "PA14_27610",
"PA14_27620", "PA14_30680", "PA14_54840", "PA14_41330", "PA14_51660",
"PA14_60150", "PA14_07470", "PA14_52320", "PA14_61830", "PA14_62790",
"PA14_68030", "PA14_24870", "PA14_28740", "PA14_60160", "PA14_30420",
"PA14_51230", "PA14_52560", "PA14_27190", "PA14_08670", "PA14_58710",
"PA14_68150", "PA14_08690", "PA14_08650", "PA14_41200", "PA14_21810",
"PA14_24130", "PA14_24120", "PA14_23490", "PA14_60180", "PA14_08590",
"PA14_23570", "PA14_55634", "PA14_62070", "PA14_70890", "PA14_21800",
"PA14_24110", "PA14_28190", "PA14_14570", "PA14_41320", "PA14_62800",
"PA14_65210", "PA14_65220", "PA14_08580", "PA14_55635", "PA14_62080",
"PA14_70900", "PA14_23580", "PA14_27600", "PA14_61760", "PA14_30720",
"PA14_03410", "PA14_20500", "PA14_41340", "PA14_52540", "PA14_52550",
"PA14_60620", "PA14_10850", "PA14_33480", "PA14_36590", "PA14_70270",
"PA14_36630", "PA14_36840", "PA14_22930", "PA14_68210", "PA14_68190",
"PA14_68170", "PA14_68200"), pwy_id = c("PWY-5340", "PWY-5340",
"PWY-5143", "PWY-5143", "PWY-5921", "PWY-5921", "PWY-5921", "PWY-5921",
"PWY-5921", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY",
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "PWY-5782", "PWY-5782",
"GLYCOCAT-PWY", "GLYCOCAT-PWY", "GLYCOCAT-PWY", "GLYCOCAT-PWY",
"GLYCOCAT-PWY", "DTDPRHAMSYN-PWY", "DTDPRHAMSYN-PWY", "DTDPRHAMSYN-PWY",
"DTDPRHAMSYN-PWY")), .Names = c("PA14_locus", "pwy_id"), row.names = c(NA,
100L), class = "data.frame")
以下是包含所有基因座名称的列表示例。
> head(PA14_annot_UCSD$PA14_locus)
[1] PA14_28410 PA14_42500 PA14_31260 PA14_28280 PA14_68610 PA14_72540
4434 Levels: EMPTY PA14_00020 PA14_00030 PA14_00070 PA14_00080 PA14_00120 PA14_00150 ... PA14_73410