R - 如何创建关联矩阵

时间:2014-11-26 13:15:22

标签: r

我有一个数据框,将每个基因座链接到一个途径。 我想制作一个关联矩阵,其中每一行代表一条路径,每列都是轨迹。如果该路径位于该路径中,则为1,否则为0.

我写了一些东西试试这样做,但这花了太长时间。我有大约6种不同的途径集,每种途径有数百种途径,我有近6000个基因座名称。

我正在使用复杂的双重保护结构。 任何人都可以建议一种更有效的方法吗?

这是我尝试的代码,这花了太长时间。

PA14_BioCycAllPwy_binVecs<-sapply(df$pwy_id,function(pwy) {
sapply(PA14_annot_UCSD$PA14_locus,function(loc) ifelse(nrow(subset(df,df$PA14_locus==locus && df$pwy_id==pwy))>0,1,0))
})

这是一个示例locus2pathway数据帧。

> dput(df[1:100,])
structure(list(PA14_locus = c("PA14_57720", "PA14_57710", "PA14_21370", 
"PA14_21340", "PA14_23560", "PA14_58190", "PA14_58180", "PA14_58170", 
"PA14_61760", "PA14_66750", "PA14_41360", "PA14_41380", "PA14_23560", 
"PA14_60370", "PA14_12230", "PA14_14440", "PA14_52600", "PA14_51820", 
"PA14_08560", "PA14_10420", "PA14_57670", "PA14_28650", "PA14_30330", 
"PA14_51900", "PA14_28710", "PA14_28690", "PA14_19050", "PA14_16530", 
"PA14_14890", "PA14_00100", "PA14_00090", "PA14_08660", "PA14_27610", 
"PA14_27620", "PA14_30680", "PA14_54840", "PA14_41330", "PA14_51660", 
"PA14_60150", "PA14_07470", "PA14_52320", "PA14_61830", "PA14_62790", 
"PA14_68030", "PA14_24870", "PA14_28740", "PA14_60160", "PA14_30420", 
"PA14_51230", "PA14_52560", "PA14_27190", "PA14_08670", "PA14_58710", 
"PA14_68150", "PA14_08690", "PA14_08650", "PA14_41200", "PA14_21810", 
"PA14_24130", "PA14_24120", "PA14_23490", "PA14_60180", "PA14_08590", 
"PA14_23570", "PA14_55634", "PA14_62070", "PA14_70890", "PA14_21800", 
"PA14_24110", "PA14_28190", "PA14_14570", "PA14_41320", "PA14_62800", 
"PA14_65210", "PA14_65220", "PA14_08580", "PA14_55635", "PA14_62080", 
"PA14_70900", "PA14_23580", "PA14_27600", "PA14_61760", "PA14_30720", 
"PA14_03410", "PA14_20500", "PA14_41340", "PA14_52540", "PA14_52550", 
"PA14_60620", "PA14_10850", "PA14_33480", "PA14_36590", "PA14_70270", 
"PA14_36630", "PA14_36840", "PA14_22930", "PA14_68210", "PA14_68190", 
"PA14_68170", "PA14_68200"), pwy_id = c("PWY-5340", "PWY-5340", 
"PWY-5143", "PWY-5143", "PWY-5921", "PWY-5921", "PWY-5921", "PWY-5921", 
"PWY-5921", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", 
"TRNA-CHARGING-PWY", "TRNA-CHARGING-PWY", "PWY-5782", "PWY-5782", 
"GLYCOCAT-PWY", "GLYCOCAT-PWY", "GLYCOCAT-PWY", "GLYCOCAT-PWY", 
"GLYCOCAT-PWY", "DTDPRHAMSYN-PWY", "DTDPRHAMSYN-PWY", "DTDPRHAMSYN-PWY", 
"DTDPRHAMSYN-PWY")), .Names = c("PA14_locus", "pwy_id"), row.names = c(NA, 
100L), class = "data.frame")

以下是包含所有基因座名称的列表示例。

> head(PA14_annot_UCSD$PA14_locus)
[1] PA14_28410 PA14_42500 PA14_31260 PA14_28280 PA14_68610 PA14_72540
4434 Levels: EMPTY PA14_00020 PA14_00030 PA14_00070 PA14_00080 PA14_00120 PA14_00150 ... PA14_73410

0 个答案:

没有答案