我正在尝试运行ddply
来计算第1列和第2列中每个元素的出现次数。所以我使用ddply
两次:
1)当我第一次运行操作时,它会创建我期望的值
xx <- ddply(
xx, "Annotated.Sequence", transform,
PSMPerPep = length(Annotated.Sequence)
)
2)然而,当我在不同的列上第二次运行相同的操作时,不仅会创建一个包含错误数字的列,还会设置更改第一列中的数字。有什么建议?
xx <- ddply(
xx, "Protein.Accessions", transform,
PSMPerProt = length(Protein.Accessions)
)
xx <- structure(list(Annotated.Sequence = c("ACDLPAWVHFPDTER", "ACIGDTLCQK",
"ACIGDTLCQK", "ADEEFFAK", "AENPGISFGQVGK", "AETLYEGPSDDPFCTAIR",
"AETLYEGPSDDPFCTAIR", "AFDDESVQK", "AILGATNPLQSAPGTIR", "AILGATNPLQSAPGTIR",
"AIVNATTHYNDPVK", "AKGDNDPLDVCEIGEK", "ALALLEDEER", "ALALLEDEER",
"ALALLEDEER", "ALALLEDEER", "APVGNPEGADKPNKK", "APVVQQPAPSFK",
"AQPPEAGPQGLHDLGR", "AQPPEAGPQGLHDLGR", "AQPPEAGPQGLHDLGR", "ASTGEEYTAETDPGVQSVK",
"ASTSTSAPASTPSPSSK", "ATGTTIVTDTGEFEQIAK", "AVANVNDIIAPALIK",
"AVANVNDIIAPALIK", "AVANVNDIIAPALIK", "AVANVNDIIAPALIK", "AVASSGQELSVEER",
"AYLAENDPDSVEAFEK", "AYLAENDPDSVEAFEK", "CLANLRPLLDSGTMGTK",
"CLANLRPLLDSGTMGTK", "DDDHNGHIDFITAASNLR", "DDGYGGGYGGGRPDDR",
"DDLFNTNASIVR", "DFNTGSANAAADAGGEDIPDLVDQKFDDVE", "DFPQLDSQLPK",
"DLFDYAQEK", "DLFDYAQEK", "DLQELIAEGNTK", "DNDSYGSSNRR", "DNDSYGSSNRR",
"DQANDGLSSALLILYLDSAR", "DQANDGLSSALLILYLDSAR", "DQANDGLSSALLILYLDSAR",
"DVGADGQPTEELK", "DVYEDELVPVFEAVGR", "DVYEDELVPVFEAVGR", "DVYEDELVPVFEAVGR",
"DVYEDELVPVFEAVGR", "DVYEDELVPVFEAVGR", "EADTNNDGEIDIQEFTSLLAAK",
"EADTNNDGEIDIQEFTSLLAAK", "EDFTLLDFINAVK", "EDFTLLDFINAVK", "EEEQEEEEDAEK",
"EFILEQHNK", "EGETETEGAATATAAATEAK", "EGTIPTDYEQATGLQR", "EGTIPTDYEQATGLQR",
"EIFDNVNSEFNVALK", "EIFISNITQANPGIVTCLENHPHK", "ELIEPAEGEEVDEDAEPQYR",
"ELIEPAEGEEVDEDAEPQYR", "ELNNYEIRPGR", "ELNNYEIRPGR", "EPTPSIASDISLPIATQELR",
"EPTPSIASDISLPIATQELR", "EPTPSIASDISLPIATQELR", "EPTPSIASDISLPIATQELR",
"EPTPSIASDISLPIATQELR", "EPTPSIASDISLPIATQELR", "EPTPSIASDISLPIATQELR",
"ERLLDEWFTLDEVPK", "ERPPDHQHSAQVK", "ESDEFIAEK", "ESNPADGRENLQSIEDR",
"ETIEPAVR", "FDLNEPLHLSFLQNAAK", "FGGGRPDDR", "FRQELTSLADVYINDAFGTAHR",
"FVSEVAGTNPVNENVPVVGGHSGVTIVPLLSQTK", "FWQTYSSAEEVLQK", "GANTHLSTFSFTK",
"GAPGVAADVSHVPTNSTVK", "GAPGVAADVSHVPTNSTVK", "GATYGKPTNQGVNQLK",
"GDNDPLDVCEIGEK", "GEGEGGELPGVTPYPNENELIK", "GEVTASGDDLVIDGHK",
"GEVTASGDDLVIDGHK", "GFDSAEGLQTSGLHVQGQK", "GGDVSSTTYDA", "GGNDYEIYNDPR",
"GGVIMDVVNADQAK", "GIFGYGYETPSAIQQR", "GISELGIYPAVDPLDSK", "GMITVTDPDLIEK",
"GNPTVEVDFTTDK", "GNPTVEVDFTTDK", "GSGHSNTVR", "GTDEANGATEFDR",
"GTEVNDTGAPISVPVGR", "GTEVNDTGAPISVPVGR", "GTQFGLQTPGSR", "GTQFGLQTPGSR",
"GWDISLTNNYGK", "GWTQWYDLTEDGTRPQAMT", "GWTQWYDLTEDGTRPQAMT",
"HEGGFGGGRPDDR", "HIANISNAK", "HNDDEQYVWESNAGGK", "HSEFVAYPIQLVVTK",
"HVVFGEVTDGLDIVK", "HVVFGEVTDGLDIVK", "HVVFGEVTDGLDIVK", "IADSGLTALSYTQELRPGVK",
"IADSGLTALSYTQELRPGVK", "IDEFLLSLDGTPNK", "IDEFLLSLDGTPNK", "IEEELGSEAIYAGK",
"IEEELGSEAIYAGK", "IEEELGSEAIYAGK", "IESFGSGSGATSK", "IHFIEAQDLQGK",
"IIPAIATTTATVSGLVALEMIK", "IIAAVPNASDVAVCSSR", "ILENSEGGR", "IPRDVYEDELVPVFEAVGR",
"IQEFKPSNK", "IQIVGDDLTVTNPTR", "ISPSDQSSTVISASWDK", "ISPSDQSSTVISASWDK",
"ISSNPNPVVQMSVGHK", "ISSNPNPVVQMSVGHK", "ISSNPNPVVQMSVGHK", "ISSNPNPVVQMSVGHK",
"ISSNPNPVVQMSVGHK", "ISSNPNPVVQMSVGHK", "IVDMSTSK", "IVIEESGQL",
"IVSDWSNIVVAYEPVWAIGTGLAATPEDAEETHK", "IVTGVNPQSAVK", "IWCFGPDGNGPNLVVDQTK",
"KAEDEEEDEGEIDETGLDPK", "KIESCGTSSGTPSASVVIEESGEAEK", "KIESFGSGSGATSK",
"KLEDLSPSTHNMEVPNVSR", "KLQINLVVEDALVSLDDLQAAVEEDEDHVQSTDIAAMQK",
"KPNVGCQQDSEELLK", "KPNVGCQQDSEELLK", "KPNVGCQQDSEELLK", "KPNVGCQQDSEELLK",
"KPNVGCQQDSEELLK", "KPTATTETCAVAAVSAAYEQDAK", "KREEILEEIAK",
"KYDVVVIGGGPGGYVAAIK", "LADYLINVGY", "LEWLTLMPNASNLDK", "LEWLTLMPNASNLDK",
"LFCDFGDEFEVLDTTGEEPK", "LFCDFGDEFEVLDTTGEEPK", "LFCDFGDEFEVLDTTGEEPK",
"LFCDFGDEFEVLDTTGEEPK", "LGANAILGVSLAAANAAAAAQGIPLYK", "LGIHEDAQNR",
"LISWYDNEYGYSTR", "LLGVCCSVDNCR", "LLYGHLDDPHNQEIER", "LLYNDYVSNPSK",
"LQSENFTYEIVK", "LRDQAINNAQR", "LSHVSTGGGASLELLEGK", "LTGGEDNQYGIPK",
"LVEALCNEPEEK", "LVEDPQIVAPFMDK", "LWDLETGETTQR", "LWDLETGETTQR",
"MGHAGAIVAGGK", "MLIFEDVISGDELLSDAYDVK", "MLIFEDVISGDELLSDAYDVK",
"MPIGDSLFDEAGAK", "MQLVQESEEK", "MQLVQESEEK", "MQLVQESEEK", "NCFLNLAIPIVVFTETTEVRKTK",
"NDREFNGIIAQTTNDNITEAGK", "NFALLGVGTSK", "NGDQDLVLEVAQHLGENTVR",
"NLIAFSEDGSDPYVR", "NLIAFSEDGSDPYVR", "NLIAFSEDGSDPYVR", "NLIAFSEDGSDPYVR",
"NLIAFSEDGSDPYVR", "NLIAFSEDGSDPYVR", "NLIAFSEDGSDPYVR", "NQAALNPK",
"NVPGVETASVK", "NWSQCVELAR", "QAFDDAVADLETLSEDSYK", "QATFPGVQMK",
"QATFPGVQMK", "QDVIITALDNVEAR", "QDVIITALDNVEAR", "QDVIITALDNVEAR",
"QEEEEEEK", "QGDNEIEGLTDTTVPK", "QLENGTTLGQSPLGQIQLTIR", "RAGELTQEELER",
"REAQLCVLCDSVTEESIIK", "RERPPDHQHSAQVK", "RERPPDHQHSAQVK", "RERPPDHQHSAQVK",
"RISTVGELNDLFADK", "RISTVGELNDLFADK", "RQENLAK", "RQGTSPDTMR",
"SEPLPTEEEK", "SFGQFNPGCVER", "SFGQFNPGCVER", "SFGQFNPGCVER",
"SFGQFNPGCVER", "SFGQFNPGCVER", "SGETEDTFIADLSVGLR", "SGLAEGYSYTDANK",
"SGLAEGYSYTDANK", "SGQAAFGNMCR", "SGYTLPSNIISNTDVTR", "SHMSGSPGPGGSNTAPSTPVIGGSDKPGMEEK",
"SIDDSVAQIIG", "SINPNYTPVPVPETK", "SIVPSGASTGVHEALELR", "SLQDIIAILGMDELSEADK",
"SNETGILDAIK", "SSSSLLASPGHISVK", "STGDDNEVAEEEEADVEFTPVVQLDK",
"STAAEELANTFGYK", "SVEQIDDCPAGNIIGLVGIDQFLLK", "SYTAADATLK",
"SAAGTYVVFGEAK", "TAEDVIAAFECN", "TASGNIIPSSTGAAK", "TASGNIIPSSTGAAK",
"TASGNIIPSSTGAAK", "TCNVLVAIEQQSPDIAQGLHYEK", "TCYNCGK", "TGQFGWSANMER",
"TGTPLFSSHMLDLSEETDDENIATCAK", "TGYSMVQENGQR", "TGYSMVQENGQR",
"THGPQIK", "TKQTILIAHYPSGVQPGEATTLVEK", "TLNPVFDQSFDFSVSLPEVQR",
"TLNPVFDQSFDFSVSLPEVQRR", "TLNPVFDQSFDFSVSLPEVQRR", "TLTTVQGVPNEYDLK",
"TLTTVQGVPNEYDLKK", "TVEDDHPIPEDVHENYQNTVAEFASR", "VATLYDMIDHQDATNLDDK",
"VCPTTETIYNDEFYTK", "VCPTTETIYNDEFYTK", "VCPTTETIYNDEFYTK", "VDFNVPLDGK",
"VDVGQQPLR", "VEEPLGSYAPNTIDKPFYER", "VEQEAEQQIHK", "VHADQTPEDLDMDDGDTIEAHR",
"VHLVAIDIFTGK", "VIITAPSADAPMFVVGVNEDK", "VIITAPSADAPMFVVGVNEDK",
"VITSSAR", "VNLDTDCQYAYLTGIR", "VNLDTDCQYAYLTGIRDYVTNK", "VNSAVVTCPAYFNDAQR",
"VVDLLEHVAK", "VVDLLEHVAK", "VVDLLEHVAK", "VVNDTFGIEEGLMTTVHSITATQK",
"VVQTDETAR", "WAGNANELNAGYAADGYAR", "WVVIGDENFGEGSSR", "YGGPPPGWEGPHPQR",
"YGIEPTMVVQGVK", "YGQSAGNVGDEGGVAPDIK", "YHIEEEGSSK", "YHIEEEGSSK",
"YKGEVTASGDDLVIDGHK", "YLDQVLDHQR", "YQALSDPSQLESEPELFIR", "YQCVVLTEMK",
"YVDEQVAAAEADAPPEAK", "YVECSALTQR", "YVHGGNVLIDPTAK", "YAATPANPAK",
"YAATPANPAK", "AAADYAPNAAVCIISNPVNSTVPIVAEVFK", "AAADYAPNAAVCIISNPVNSTVPIVAEVFK",
"AAIRDPNPVVFLENEIAYGETFK", "AAVEEGILPGGGTALIK"), Protein.Accessions = c("HS_A0FGR8",
"HS_A0AVT1", "HS_A0AVT1", "CA_Q9URB4", "CA_Q9UVL1", "CA_Q5A0M4",
"CA_Q5A0M4", "CA_Q5A397", "CA_Q5AG68", "CA_Q5AG68", "CA_Q5AIA6",
"CA_P83777", "HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8",
"CA_Q9URB4", "CA_Q9Y7F0", "HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8",
"CA_Q5A017", "CA_Q5AGX8", "CA_Q5A017", "CA_P30575", "CA_P30575",
"CA_P30575", "CA_P30575", "CA_O42766", "CA_Q5A860", "CA_Q5A860",
"HS_A0AVT1", "HS_A0AVT1", "HS_A0AVT1", "CA_Q5A4I4", "CA_Q5AMP4",
"CA_Q59TU0", "CA_P83775", "CA_Q9URB4", "CA_Q9URB4", "CA_Q5ANH5",
"CA_Q59X49", "CA_Q59X49", "HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8",
"CA_Q5AL30", "HS_A0AV96", "HS_A0AV96", "HS_A0AV96", "HS_A0AV96",
"HS_A0AV96", "CA_Q59Q76", "CA_Q59Q76", "HS_A0AVT1", "HS_A0AVT1",
"CA_Q59N01", "CA_Q5AGD1", "CA_Q59S96", "CA_Q5ALV5", "CA_Q5ALV5",
"CA_Q5AF03", "HS_A0AVT1", "CA_Q96VB9", "CA_Q96VB9", "HS_A0AV96",
"HS_A0AV96", "HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8",
"HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8",
"CA_Q9P940", "CA_Q59QN7", "HS_A0FGR8", "HS_A0AVT1", "CA_Q5A4I4",
"CA_P46273", "CA_Q5AMP4", "HS_A0AVT1", "HS_A0FGR8", "CA_Q5AMP4",
"CA_Q5AMP4", "CA_Q5A6R1", "CA_P83777", "CA_Q59P14", "CA_Q5ADM7",
"CA_Q5ADM7", "CA_Q5A786", "CA_Q5AKV3", "CA_P31353", "CA_Q5AIA6",
"CA_P87206", "CA_Q59UR7", "HS_A0AVT1", "CA_P30575", "CA_P30575",
"CA_P46614", "CA_Q59LS1", "CA_Q59UR7", "CA_Q59UR7", "CA_Q59MR4",
"CA_Q59MR4", "CA_P28870", "HS_A0FGR8", "HS_A0FGR8", "CA_Q5A4I4",
"CA_P30575", "CA_P46598", "CA_P46598", "CA_P22011", "CA_P22011",
"CA_P22011", "CA_P83781", "CA_P83781", "CA_P30575", "CA_P30575",
"CA_P30575", "CA_P30575", "CA_P30575", "CA_P22011", "HS_A0FGR8",
"HS_A0AVT1", "CA_O42817", "CA_P83784", "HS_A0AV96", "HS_A0AVT1",
"CA_P30575", "CA_P83774", "CA_P83774", "HS_A0FGR8", "HS_A0FGR8",
"HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8", "CA_O94083",
"CA_P22011", "CA_Q9P940", "CA_Q5AF03", "CA_Q5A0M4", "CA_Q5ANP2",
"CA_Q5ALM6", "CA_P22011", "CA_O94083", "CA_Q5A652", "HS_A0AVT1",
"HS_A0AVT1", "HS_A0AVT1", "HS_A0AVT1", "HS_A0AVT1", "CA_P46614",
"HS_A0AV96", "CA_Q59RQ6", "CA_Q5A786", "HS_A0FGR8", "HS_A0FGR8",
"HS_A0AVT1", "HS_A0AVT1", "HS_A0AVT1", "HS_A0AVT1", "CA_P30575",
"CA_P46598", "CA_Q5ADM7", "HS_A0AV96", "CA_P82611", "CA_Q5A3Z7",
"CA_P83775", "CA_Q59RJ3", "CA_P46273", "CA_P83774", "CA_Q5ADQ6",
"CA_P25997", "CA_P83774", "CA_P83774", "CA_Q5A8X6", "CA_Q5A860",
"CA_Q5A860", "CA_P46273", "HS_A0A0B4J2F0", "HS_A0A0B4J2F0", "HS_A0A0B4J2F0",
"HS_A0AVT1", "CA_Q5AK04", "HS_A0AVT1", "CA_Q59UR7", "HS_A0FGR8",
"HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8",
"HS_A0FGR8", "CA_Q5A397", "CA_Q59ZX4", "HS_A0AVT1", "CA_O42766",
"CA_P83779", "CA_P83779", "HS_A0AVT1", "HS_A0AVT1", "HS_A0AVT1",
"CA_Q5A795", "CA_Q5AMI6", "HS_A0FGR8", "CA_Q5AFQ0", "CA_Q5ADQ6",
"HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8", "CA_P83779", "CA_P83779",
"CA_Q5A2U8", "CA_Q59W54", "CA_Q5A5F2", "HS_A0AV96", "HS_A0AV96",
"HS_A0AV96", "HS_A0AV96", "HS_A0AV96", "CA_P30575", "CA_P53698",
"CA_P53698", "CA_Q59ZX4", "CA_Q59ZX4", "HS_A0FGR8", "CA_P83775",
"CA_P83779", "CA_P30575", "CA_Q59UR7", "CA_Q59RD8", "HS_A0FGR8",
"CA_Q5AAL2", "CA_Q59SM9", "CA_Q5A0M4", "HS_A0AVT1", "CA_Q5ANP2",
"CA_Q5AF03", "CA_Q5ADM7", "CA_Q5ADM7", "CA_Q5ADM7", "CA_Q59W67",
"CA_Q59YJ9", "CA_P46598", "CA_Q9URB4", "HS_A0AV96", "HS_A0AV96",
"CA_P83775", "CA_Q5A786", "HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8",
"CA_Q59LQ6", "CA_Q59LQ6", "CA_Q5A7T3", "CA_Q5A5A0", "HS_A0AVT1",
"HS_A0AVT1", "HS_A0AVT1", "CA_P46273", "HS_A0FGR8", "CA_Q5AND4",
"CA_Q5A389", "CA_Q59W54", "CA_O94083", "CA_Q5ADM7", "CA_Q5ADM7",
"CA_Q5A5P4", "CA_Q9URB4", "CA_Q9URB4", "CA_P83784", "CA_Q5ADM7",
"CA_Q5ADM7", "CA_Q5ADM7", "CA_Q5ADM7", "HS_A0AVT1", "CA_P83779",
"CA_P82611", "HS_A0AV96", "HS_A0AVT1", "CA_P30575", "CA_P46273",
"CA_P46273", "CA_Q5ADM7", "CA_Q59TE0", "CA_P46598", "HS_A0AVT1",
"CA_Q5A0Z9", "CA_P0CY33", "CA_O93827", "CA_Q59TE0", "CA_Q59TE0",
"CA_Q5AMP4", "CA_Q5AMP4", "CA_Q5A5V6", "CA_O74261"), PSMPerPep = c(1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 4L, 4L, 4L, 4L, 1L,
1L, 3L, 3L, 3L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 1L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 3L, 3L, 3L, 1L, 5L, 5L,
5L, 5L, 5L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
2L, 2L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L,
1L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 6L, 6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, 2L, 2L,
4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 1L, 2L, 2L, 1L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 1L, 1L, 1L, 1L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L,
1L, 1L, 3L, 3L, 3L, 2L, 2L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 1L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 1L, 1L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L,
1L, 1L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L)), .Names = c("Annotated.Sequence",
"Protein.Accessions", "PSMPerPep"), row.names = c(NA, -300L), class = "data.frame")
答案 0 :(得分:0)
dput()
那样的长线问题(比如将来像@nrussell那样更合适地格式化)ddply
并不能保证您以相同的顺序返回即:
head(xx)
## Annotated.Sequence Protein.Accessions PSMPerPep
## 1 ACDLPAWVHFPDTER HS_A0FGR8 1
## 2 ACIGDTLCQK HS_A0AVT1 2
## 3 ACIGDTLCQK HS_A0AVT1 2
## 4 ADEEFFAK CA_Q9URB4 1
## 5 AENPGISFGQVGK CA_Q9UVL1 1
## 6 AETLYEGPSDDPFCTAIR CA_Q5A0M4 2
yy <- plyr::ddply(xx, "Annotated.Sequence", transform,
PSMPerPep = length(Annotated.Sequence))
y1 <- yy
head(yy)
## Annotated.Sequence Protein.Accessions PSMPerPep
## 1 AAADYAPNAAVCIISNPVNSTVPIVAEVFK CA_Q5AMP4 2
## 2 AAADYAPNAAVCIISNPVNSTVPIVAEVFK CA_Q5AMP4 2
## 3 AAIRDPNPVVFLENEIAYGETFK CA_Q5A5V6 1
## 4 AAVEEGILPGGGTALIK CA_O74261 1
## 5 ACDLPAWVHFPDTER HS_A0FGR8 1
## 6 ACIGDTLCQK HS_A0AVT1 2
yy <- plyr::ddply(yy, "Protein.Accessions", transform,
PSMPerProt = length(Protein.Accessions))
head(yy)
## Annotated.Sequence Protein.Accessions PSMPerPep PSMPerProt
## 1 AVASSGQELSVEER CA_O42766 1 2
## 2 QAFDDAVADLETLSEDSYK CA_O42766 1 2
## 3 IIAAVPNASDVAVCSSR CA_O42817 1 1
## 4 AAVEEGILPGGGTALIK CA_O74261 1 1
## 5 YVHGGNVLIDPTAK CA_O93827 1 1
## 6 IVDMSTSK CA_O94083 1 3
head(dplyr::arrange(y1, Annotated.Sequence))
## Annotated.Sequence Protein.Accessions PSMPerPep
## 1 AAADYAPNAAVCIISNPVNSTVPIVAEVFK CA_Q5AMP4 2
## 2 AAADYAPNAAVCIISNPVNSTVPIVAEVFK CA_Q5AMP4 2
## 3 AAIRDPNPVVFLENEIAYGETFK CA_Q5A5V6 1
## 4 AAVEEGILPGGGTALIK CA_O74261 1
## 5 ACDLPAWVHFPDTER HS_A0FGR8 1
## 6 ACIGDTLCQK HS_A0AVT1 2
head(dplyr::arrange(yy, Annotated.Sequence))
## Annotated.Sequence Protein.Accessions PSMPerPep PSMPerProt
## 1 AAADYAPNAAVCIISNPVNSTVPIVAEVFK CA_Q5AMP4 2 6
## 2 AAADYAPNAAVCIISNPVNSTVPIVAEVFK CA_Q5AMP4 2 6
## 3 AAIRDPNPVVFLENEIAYGETFK CA_Q5A5V6 1 1
## 4 AAVEEGILPGGGTALIK CA_O74261 1 1
## 5 ACDLPAWVHFPDTER HS_A0FGR8 1 50
## 6 ACIGDTLCQK HS_A0AVT1 2 35
head(dplyr::arrange(xx, Annotated.Sequence))
## Annotated.Sequence Protein.Accessions PSMPerPep
## 1 AAADYAPNAAVCIISNPVNSTVPIVAEVFK CA_Q5AMP4 2
## 2 AAADYAPNAAVCIISNPVNSTVPIVAEVFK CA_Q5AMP4 2
## 3 AAIRDPNPVVFLENEIAYGETFK CA_Q5A5V6 1
## 4 AAVEEGILPGGGTALIK CA_O74261 1
## 5 ACDLPAWVHFPDTER HS_A0FGR8 1
## 6 ACIGDTLCQK HS_A0AVT1 2
您可以转到dplyr
并执行以下操作:
library(dplyr)
xx %>%
group_by(Annotated.Sequence) %>%
mutate(PSMPerPep = n()) %>%
group_by(Protein.Accessions) %>%
mutate(PSMPerProt = n()) %>%
ungroup()
## # A tibble: 300 × 4
## Annotated.Sequence Protein.Accessions PSMPerPep PSMPerProt
## <chr> <chr> <int> <int>
## 1 ACDLPAWVHFPDTER HS_A0FGR8 1 50
## 2 ACIGDTLCQK HS_A0AVT1 2 35
## 3 ACIGDTLCQK HS_A0AVT1 2 35
## 4 ADEEFFAK CA_Q9URB4 1 7
## 5 AENPGISFGQVGK CA_Q9UVL1 1 1
## 6 AETLYEGPSDDPFCTAIR CA_Q5A0M4 2 4
## 7 AETLYEGPSDDPFCTAIR CA_Q5A0M4 2 4
## 8 AFDDESVQK CA_Q5A397 1 2
## 9 AILGATNPLQSAPGTIR CA_Q5AG68 2 2
## 10 AILGATNPLQSAPGTIR CA_Q5AG68 2 2
## # ... with 290 more rows
也会保留原来的xx
Annotated.Sequence
订单。