我有一个由蛋白质组成的清单:
>head(PPI)
$A1CF
[1] "SYNCRIP" "KHSRP"
$A2LD1
[1] "PRPSAP2" "RPL15"
$A2M
[1] "MMP2" "NGF" "IL10" "CELA1" "KLK3" "C11orf58" "LCAT" "IL1B" "KLK13" "ANXA6" "SERPINA1" "TGFBI" "KLK5" "LRP1"
[15] "PDGFA" "ADAMTS1" "KLK2" "KLKB1" "F2" "CPB2" "MYOC" "PLG" "CTSE" "ADAM19" "SHBG" "PAEP" "HSPA5" "APOE"
[29] "SPACA3" "APP" "PDGFB"
$AAAS
[1] "EP300" "NUP214" "NUP133" "NUP37" "NUP35" "RANBP2" "NUP210"
$AAGAB
[1] "EIF3C" "UNC119" "AFTPH"
$AAK1
[1] "PRKAA1" "SPEG" "JAK1" "KIAA0195" "AURKB" "MAPK6" "FER" "PDE4A" "ALPK3" "HIPK1" "MAP4K5" "LSM14A" "TBKBP1" "FRYL"
[15] "SIK2" "PKN3" "ACOX3" "MAP4K2" "TAOK1" "SIK3" "AZI2" "TESK2" "TBK1" "KIAA0528" "PTPN18" "PIP4K2C" "CAMK2G" "CABC1"
[29] "NEK11"
和我要解析的第二个列表(也包含蛋白质名称):
>head(mylist)
$cluster.1
[1] "HSP90AB1" "INMT" "CKB" "NR2E1" "ME3" "FAM162A" "KIRREL2"
$cluster.2
[1] "ENSG00000212860" "TRADD" "C1QBP" "KIAA1967" "ENSG00000137379" "MAP3K3" "TNFRSF1B" "BAG2"
[9] "ENSG00000212866" "RIPK3" "EPRS" "HSPA6" "HSPA5" "IKBKG" "TBK1" "TRAF2"
[17] "MAP3K7" "NFKB1" "MAP3K14" "HSPA1A" "MAP3K7IP2" "HSPBP1" "NFKB2" "DNAJA1"
[25] "TNFRSF1A" "TRAF3IP2" "NFKBIA" "HSPA9" "ENSG00000183311" "TUBB" "TUBA3D" "TANK"
[33] "ENSG00000215292" "REL" "MAP3K1" "HSPA1B" "HSPA8" "NFKBIB" "PGAM5" "EEF1A2"
[41] "MAP3K8" "CLTC" "RCN2" "MAP3K7IP1" "RARS" "TRAF1" "TUBA3C" "HSPA1L"
[49] "MYO1D" "NOD1" "HSP90AA2" "CAD" "RELB" "AIFM1" "TUBB2B" "RIPK2"
[57] "CDC37" "IKBKB" "ERLIN1" "RIPK1" "TNIP2" "STUB1" "TUBB4" "HSPA2"
[65] "CHUK" "DNAJC3" "CCDC50" "SLC25A5" "NFKBIE" "AK3" "TICAM1" "TIMM50"
[73] "ANKRD17" "OTUD7B" "TNFAIP3" "RPS27L" "TRPC4AP" "TUBB6" "DNAJC6" "PXMP2"
[81] "FLJ25006"
$cluster.3
[1] "ACTB" "PFN1" "XPO6" "VASP" "ZYX" "PFN2" "DIAPH1" "APBB1IP" "DIAPH2" "PARVG" "ENAH" "PCYT1B" "PFN4" "CNN2" "NSMAF" "PFN3"
[17] "LMOD1"
$cluster.4
[1] "UBB" "HERC3" "KLRK1" "ULBP1" "RAET1E" "MICA" "HCST" "ENSG00000184444"
[9] "ENSG00000206449" "ULBP2" "ZNF385A" "ULBP3" "RAET1G"
$cluster.5
[1] "YWHAZ" "SLAIN2" "ZC3H13" "C12orf51" "PGLYRP1" "ATL3"
$cluster.6
[1] "ACTG1" "EPS8L3" "PARVG" "TMSB4Y" "B3GALT1" "UGT1A6"
我想计算list2 mylist
元素的每个成员使用PPI
中包含的信息与集群的其他成员进行交互的次数。我正在遵循这种方法:
PPI_sub <- PPI[mylist[[1]]]
c1.share <- lapply(mylist[[1]], function(z) data.frame(lineNum=1:length(PPI_sub), count=sapply(PPI_sub, function(x) sum(str_count(x,z)))))
names(c1.share) <- mylist[[1]]
c1.share
$HSP90AB1
lineNum count
HSP90AB1 1 1
INMT 2 0
CKB 3 0
NR2E1 4 1
ME3 5 0
FAM162A 6 0
KIRREL2 7 0
$INMT
lineNum count
HSP90AB1 1 1
INMT 2 0
CKB 3 0
NR2E1 4 0
ME3 5 0
FAM162A 6 0
KIRREL2 7 1
$CKB
lineNum count
HSP90AB1 1 1
INMT 2 0
CKB 3 0
NR2E1 4 0
ME3 5 1
FAM162A 6 0
KIRREL2 7 0
$NR2E1
lineNum count
HSP90AB1 1 0
INMT 2 0
CKB 3 0
NR2E1 4 0
ME3 5 0
FAM162A 6 0
KIRREL2 7 0
$ME3
lineNum count
HSP90AB1 1 0
INMT 2 1
CKB 3 0
NR2E1 4 0
ME3 5 0
FAM162A 6 0
KIRREL2 7 0
$FAM162A
lineNum count
HSP90AB1 1 1
INMT 2 0
CKB 3 0
NR2E1 4 0
ME3 5 0
FAM162A 6 0
KIRREL2 7 0
$KIRREL2
lineNum count
HSP90AB1 1 0
INMT 2 0
CKB 3 0
NR2E1 4 0
ME3 5 0
FAM162A 6 0
KIRREL2 7 0
现在,如果我计算群集中每个成员的ones
的数量以及其余成员显示的次数,它将向我提供我正在寻找的数字:
HSP90AB1 5
INMT 3
CKB 2
NR2E1 1
ME3 2
FAM162A 1
KIRREL2 1
我的问题是我不知道如何自动获取这些最终值并与mylist
P.S。这是PPI
$HSP90AB1
[1] "CKB" "PDHA1" "ENTPD6" "FAM162A" "INMT" "BOLA2" "MVP" "HSP90AB1"
$INMT
[1] "COX4I1" "ME3" "THUMPD1" "KLHL8" "COX4I2"
$CKB
[1] "THUMPD1"
$NR2E1
[1] "GSPT1" "GSPT2" "KPNA3" "HSP90AB1" "EIF2B1" "TLE4"
$ME3
[1] "SFRS18" "CKB" "CKM"
$FAM162A
[1] "HSP90AA2"
$KIRREL2
[1] "INMT"
非常感谢
答案 0 :(得分:1)
您可以使用Reduce
Reduce(`+`, c1share)
概括代码并不难,只需将mylist[[1]]
替换为变量,并将其粘贴在函数中。
f <- function(mylistsub) {
PPI_sub <- PPI[mylistsub]
c1.share <- lapply(mylistsub, function(z) data.frame(lineNum=1:length(PPI_sub), count=sapply(PPI_sub, function(x) sum(str_count(x,z)))))
names(c1.share) <-mylistsub
Reduce(`+`, c1share)
}
lapply(mylist, f)