ddply的第二次迭代改变了第一次迭代的结果

时间:2016-10-07 12:01:34

标签: r plyr

我正在尝试运行ddply来计算第1列和第2列中每个元素的出现次数。所以我使用ddply两次:

1)当我第一次运行操作时,它会创建我期望的值

xx <- ddply(
    xx, "Annotated.Sequence", transform, 
    PSMPerPep = length(Annotated.Sequence)
) 

2)然而,当我在不同的列上第二次运行相同的操作时,不仅会创建一个包含错误数字的列,还会设置更改第一列中的数字。有什么建议?

xx <- ddply(
    xx, "Protein.Accessions", transform, 
    PSMPerProt = length(Protein.Accessions)
)
xx <- structure(list(Annotated.Sequence = c("ACDLPAWVHFPDTER", "ACIGDTLCQK", 
"ACIGDTLCQK", "ADEEFFAK", "AENPGISFGQVGK", "AETLYEGPSDDPFCTAIR", 
"AETLYEGPSDDPFCTAIR", "AFDDESVQK", "AILGATNPLQSAPGTIR", "AILGATNPLQSAPGTIR", 
"AIVNATTHYNDPVK", "AKGDNDPLDVCEIGEK", "ALALLEDEER", "ALALLEDEER", 
"ALALLEDEER", "ALALLEDEER", "APVGNPEGADKPNKK", "APVVQQPAPSFK", 
"AQPPEAGPQGLHDLGR", "AQPPEAGPQGLHDLGR", "AQPPEAGPQGLHDLGR", "ASTGEEYTAETDPGVQSVK", 
"ASTSTSAPASTPSPSSK", "ATGTTIVTDTGEFEQIAK", "AVANVNDIIAPALIK", 
"AVANVNDIIAPALIK", "AVANVNDIIAPALIK", "AVANVNDIIAPALIK", "AVASSGQELSVEER", 
"AYLAENDPDSVEAFEK", "AYLAENDPDSVEAFEK", "CLANLRPLLDSGTMGTK", 
"CLANLRPLLDSGTMGTK", "DDDHNGHIDFITAASNLR", "DDGYGGGYGGGRPDDR", 
"DDLFNTNASIVR", "DFNTGSANAAADAGGEDIPDLVDQKFDDVE", "DFPQLDSQLPK", 
"DLFDYAQEK", "DLFDYAQEK", "DLQELIAEGNTK", "DNDSYGSSNRR", "DNDSYGSSNRR", 
"DQANDGLSSALLILYLDSAR", "DQANDGLSSALLILYLDSAR", "DQANDGLSSALLILYLDSAR", 
"DVGADGQPTEELK", "DVYEDELVPVFEAVGR", "DVYEDELVPVFEAVGR", "DVYEDELVPVFEAVGR", 
"DVYEDELVPVFEAVGR", "DVYEDELVPVFEAVGR", "EADTNNDGEIDIQEFTSLLAAK", 
"EADTNNDGEIDIQEFTSLLAAK", "EDFTLLDFINAVK", "EDFTLLDFINAVK", "EEEQEEEEDAEK", 
"EFILEQHNK", "EGETETEGAATATAAATEAK", "EGTIPTDYEQATGLQR", "EGTIPTDYEQATGLQR", 
"EIFDNVNSEFNVALK", "EIFISNITQANPGIVTCLENHPHK", "ELIEPAEGEEVDEDAEPQYR", 
"ELIEPAEGEEVDEDAEPQYR", "ELNNYEIRPGR", "ELNNYEIRPGR", "EPTPSIASDISLPIATQELR", 
"EPTPSIASDISLPIATQELR", "EPTPSIASDISLPIATQELR", "EPTPSIASDISLPIATQELR", 
"EPTPSIASDISLPIATQELR", "EPTPSIASDISLPIATQELR", "EPTPSIASDISLPIATQELR", 
"ERLLDEWFTLDEVPK", "ERPPDHQHSAQVK", "ESDEFIAEK", "ESNPADGRENLQSIEDR", 
"ETIEPAVR", "FDLNEPLHLSFLQNAAK", "FGGGRPDDR", "FRQELTSLADVYINDAFGTAHR", 
"FVSEVAGTNPVNENVPVVGGHSGVTIVPLLSQTK", "FWQTYSSAEEVLQK", "GANTHLSTFSFTK", 
"GAPGVAADVSHVPTNSTVK", "GAPGVAADVSHVPTNSTVK", "GATYGKPTNQGVNQLK", 
"GDNDPLDVCEIGEK", "GEGEGGELPGVTPYPNENELIK", "GEVTASGDDLVIDGHK", 
"GEVTASGDDLVIDGHK", "GFDSAEGLQTSGLHVQGQK", "GGDVSSTTYDA", "GGNDYEIYNDPR", 
"GGVIMDVVNADQAK", "GIFGYGYETPSAIQQR", "GISELGIYPAVDPLDSK", "GMITVTDPDLIEK", 
"GNPTVEVDFTTDK", "GNPTVEVDFTTDK", "GSGHSNTVR", "GTDEANGATEFDR", 
"GTEVNDTGAPISVPVGR", "GTEVNDTGAPISVPVGR", "GTQFGLQTPGSR", "GTQFGLQTPGSR", 
"GWDISLTNNYGK", "GWTQWYDLTEDGTRPQAMT", "GWTQWYDLTEDGTRPQAMT", 
"HEGGFGGGRPDDR", "HIANISNAK", "HNDDEQYVWESNAGGK", "HSEFVAYPIQLVVTK", 
"HVVFGEVTDGLDIVK", "HVVFGEVTDGLDIVK", "HVVFGEVTDGLDIVK", "IADSGLTALSYTQELRPGVK", 
"IADSGLTALSYTQELRPGVK", "IDEFLLSLDGTPNK", "IDEFLLSLDGTPNK", "IEEELGSEAIYAGK", 
"IEEELGSEAIYAGK", "IEEELGSEAIYAGK", "IESFGSGSGATSK", "IHFIEAQDLQGK", 
"IIPAIATTTATVSGLVALEMIK", "IIAAVPNASDVAVCSSR", "ILENSEGGR", "IPRDVYEDELVPVFEAVGR", 
"IQEFKPSNK", "IQIVGDDLTVTNPTR", "ISPSDQSSTVISASWDK", "ISPSDQSSTVISASWDK", 
"ISSNPNPVVQMSVGHK", "ISSNPNPVVQMSVGHK", "ISSNPNPVVQMSVGHK", "ISSNPNPVVQMSVGHK", 
"ISSNPNPVVQMSVGHK", "ISSNPNPVVQMSVGHK", "IVDMSTSK", "IVIEESGQL", 
"IVSDWSNIVVAYEPVWAIGTGLAATPEDAEETHK", "IVTGVNPQSAVK", "IWCFGPDGNGPNLVVDQTK", 
"KAEDEEEDEGEIDETGLDPK", "KIESCGTSSGTPSASVVIEESGEAEK", "KIESFGSGSGATSK", 
"KLEDLSPSTHNMEVPNVSR", "KLQINLVVEDALVSLDDLQAAVEEDEDHVQSTDIAAMQK", 
"KPNVGCQQDSEELLK", "KPNVGCQQDSEELLK", "KPNVGCQQDSEELLK", "KPNVGCQQDSEELLK", 
"KPNVGCQQDSEELLK", "KPTATTETCAVAAVSAAYEQDAK", "KREEILEEIAK", 
"KYDVVVIGGGPGGYVAAIK", "LADYLINVGY", "LEWLTLMPNASNLDK", "LEWLTLMPNASNLDK", 
"LFCDFGDEFEVLDTTGEEPK", "LFCDFGDEFEVLDTTGEEPK", "LFCDFGDEFEVLDTTGEEPK", 
"LFCDFGDEFEVLDTTGEEPK", "LGANAILGVSLAAANAAAAAQGIPLYK", "LGIHEDAQNR", 
"LISWYDNEYGYSTR", "LLGVCCSVDNCR", "LLYGHLDDPHNQEIER", "LLYNDYVSNPSK", 
"LQSENFTYEIVK", "LRDQAINNAQR", "LSHVSTGGGASLELLEGK", "LTGGEDNQYGIPK", 
"LVEALCNEPEEK", "LVEDPQIVAPFMDK", "LWDLETGETTQR", "LWDLETGETTQR", 
"MGHAGAIVAGGK", "MLIFEDVISGDELLSDAYDVK", "MLIFEDVISGDELLSDAYDVK", 
"MPIGDSLFDEAGAK", "MQLVQESEEK", "MQLVQESEEK", "MQLVQESEEK", "NCFLNLAIPIVVFTETTEVRKTK", 
"NDREFNGIIAQTTNDNITEAGK", "NFALLGVGTSK", "NGDQDLVLEVAQHLGENTVR", 
"NLIAFSEDGSDPYVR", "NLIAFSEDGSDPYVR", "NLIAFSEDGSDPYVR", "NLIAFSEDGSDPYVR", 
"NLIAFSEDGSDPYVR", "NLIAFSEDGSDPYVR", "NLIAFSEDGSDPYVR", "NQAALNPK", 
"NVPGVETASVK", "NWSQCVELAR", "QAFDDAVADLETLSEDSYK", "QATFPGVQMK", 
"QATFPGVQMK", "QDVIITALDNVEAR", "QDVIITALDNVEAR", "QDVIITALDNVEAR", 
"QEEEEEEK", "QGDNEIEGLTDTTVPK", "QLENGTTLGQSPLGQIQLTIR", "RAGELTQEELER", 
"REAQLCVLCDSVTEESIIK", "RERPPDHQHSAQVK", "RERPPDHQHSAQVK", "RERPPDHQHSAQVK", 
"RISTVGELNDLFADK", "RISTVGELNDLFADK", "RQENLAK", "RQGTSPDTMR", 
"SEPLPTEEEK", "SFGQFNPGCVER", "SFGQFNPGCVER", "SFGQFNPGCVER", 
"SFGQFNPGCVER", "SFGQFNPGCVER", "SGETEDTFIADLSVGLR", "SGLAEGYSYTDANK", 
"SGLAEGYSYTDANK", "SGQAAFGNMCR", "SGYTLPSNIISNTDVTR", "SHMSGSPGPGGSNTAPSTPVIGGSDKPGMEEK", 
"SIDDSVAQIIG", "SINPNYTPVPVPETK", "SIVPSGASTGVHEALELR", "SLQDIIAILGMDELSEADK", 
"SNETGILDAIK", "SSSSLLASPGHISVK", "STGDDNEVAEEEEADVEFTPVVQLDK", 
"STAAEELANTFGYK", "SVEQIDDCPAGNIIGLVGIDQFLLK", "SYTAADATLK", 
"SAAGTYVVFGEAK", "TAEDVIAAFECN", "TASGNIIPSSTGAAK", "TASGNIIPSSTGAAK", 
"TASGNIIPSSTGAAK", "TCNVLVAIEQQSPDIAQGLHYEK", "TCYNCGK", "TGQFGWSANMER", 
"TGTPLFSSHMLDLSEETDDENIATCAK", "TGYSMVQENGQR", "TGYSMVQENGQR", 
"THGPQIK", "TKQTILIAHYPSGVQPGEATTLVEK", "TLNPVFDQSFDFSVSLPEVQR", 
"TLNPVFDQSFDFSVSLPEVQRR", "TLNPVFDQSFDFSVSLPEVQRR", "TLTTVQGVPNEYDLK", 
"TLTTVQGVPNEYDLKK", "TVEDDHPIPEDVHENYQNTVAEFASR", "VATLYDMIDHQDATNLDDK", 
"VCPTTETIYNDEFYTK", "VCPTTETIYNDEFYTK", "VCPTTETIYNDEFYTK", "VDFNVPLDGK", 
"VDVGQQPLR", "VEEPLGSYAPNTIDKPFYER", "VEQEAEQQIHK", "VHADQTPEDLDMDDGDTIEAHR", 
"VHLVAIDIFTGK", "VIITAPSADAPMFVVGVNEDK", "VIITAPSADAPMFVVGVNEDK", 
"VITSSAR", "VNLDTDCQYAYLTGIR", "VNLDTDCQYAYLTGIRDYVTNK", "VNSAVVTCPAYFNDAQR", 
"VVDLLEHVAK", "VVDLLEHVAK", "VVDLLEHVAK", "VVNDTFGIEEGLMTTVHSITATQK", 
"VVQTDETAR", "WAGNANELNAGYAADGYAR", "WVVIGDENFGEGSSR", "YGGPPPGWEGPHPQR", 
"YGIEPTMVVQGVK", "YGQSAGNVGDEGGVAPDIK", "YHIEEEGSSK", "YHIEEEGSSK", 
"YKGEVTASGDDLVIDGHK", "YLDQVLDHQR", "YQALSDPSQLESEPELFIR", "YQCVVLTEMK", 
"YVDEQVAAAEADAPPEAK", "YVECSALTQR", "YVHGGNVLIDPTAK", "YAATPANPAK", 
"YAATPANPAK", "AAADYAPNAAVCIISNPVNSTVPIVAEVFK", "AAADYAPNAAVCIISNPVNSTVPIVAEVFK", 
"AAIRDPNPVVFLENEIAYGETFK", "AAVEEGILPGGGTALIK"), Protein.Accessions = c("HS_A0FGR8", 
"HS_A0AVT1", "HS_A0AVT1", "CA_Q9URB4", "CA_Q9UVL1", "CA_Q5A0M4", 
"CA_Q5A0M4", "CA_Q5A397", "CA_Q5AG68", "CA_Q5AG68", "CA_Q5AIA6", 
"CA_P83777", "HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8", 
"CA_Q9URB4", "CA_Q9Y7F0", "HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8", 
"CA_Q5A017", "CA_Q5AGX8", "CA_Q5A017", "CA_P30575", "CA_P30575", 
"CA_P30575", "CA_P30575", "CA_O42766", "CA_Q5A860", "CA_Q5A860", 
"HS_A0AVT1", "HS_A0AVT1", "HS_A0AVT1", "CA_Q5A4I4", "CA_Q5AMP4", 
"CA_Q59TU0", "CA_P83775", "CA_Q9URB4", "CA_Q9URB4", "CA_Q5ANH5", 
"CA_Q59X49", "CA_Q59X49", "HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8", 
"CA_Q5AL30", "HS_A0AV96", "HS_A0AV96", "HS_A0AV96", "HS_A0AV96", 
"HS_A0AV96", "CA_Q59Q76", "CA_Q59Q76", "HS_A0AVT1", "HS_A0AVT1", 
"CA_Q59N01", "CA_Q5AGD1", "CA_Q59S96", "CA_Q5ALV5", "CA_Q5ALV5", 
"CA_Q5AF03", "HS_A0AVT1", "CA_Q96VB9", "CA_Q96VB9", "HS_A0AV96", 
"HS_A0AV96", "HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8", 
"HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8", 
"CA_Q9P940", "CA_Q59QN7", "HS_A0FGR8", "HS_A0AVT1", "CA_Q5A4I4", 
"CA_P46273", "CA_Q5AMP4", "HS_A0AVT1", "HS_A0FGR8", "CA_Q5AMP4", 
"CA_Q5AMP4", "CA_Q5A6R1", "CA_P83777", "CA_Q59P14", "CA_Q5ADM7", 
"CA_Q5ADM7", "CA_Q5A786", "CA_Q5AKV3", "CA_P31353", "CA_Q5AIA6", 
"CA_P87206", "CA_Q59UR7", "HS_A0AVT1", "CA_P30575", "CA_P30575", 
"CA_P46614", "CA_Q59LS1", "CA_Q59UR7", "CA_Q59UR7", "CA_Q59MR4", 
"CA_Q59MR4", "CA_P28870", "HS_A0FGR8", "HS_A0FGR8", "CA_Q5A4I4", 
"CA_P30575", "CA_P46598", "CA_P46598", "CA_P22011", "CA_P22011", 
"CA_P22011", "CA_P83781", "CA_P83781", "CA_P30575", "CA_P30575", 
"CA_P30575", "CA_P30575", "CA_P30575", "CA_P22011", "HS_A0FGR8", 
"HS_A0AVT1", "CA_O42817", "CA_P83784", "HS_A0AV96", "HS_A0AVT1", 
"CA_P30575", "CA_P83774", "CA_P83774", "HS_A0FGR8", "HS_A0FGR8", 
"HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8", "CA_O94083", 
"CA_P22011", "CA_Q9P940", "CA_Q5AF03", "CA_Q5A0M4", "CA_Q5ANP2", 
"CA_Q5ALM6", "CA_P22011", "CA_O94083", "CA_Q5A652", "HS_A0AVT1", 
"HS_A0AVT1", "HS_A0AVT1", "HS_A0AVT1", "HS_A0AVT1", "CA_P46614", 
"HS_A0AV96", "CA_Q59RQ6", "CA_Q5A786", "HS_A0FGR8", "HS_A0FGR8", 
"HS_A0AVT1", "HS_A0AVT1", "HS_A0AVT1", "HS_A0AVT1", "CA_P30575", 
"CA_P46598", "CA_Q5ADM7", "HS_A0AV96", "CA_P82611", "CA_Q5A3Z7", 
"CA_P83775", "CA_Q59RJ3", "CA_P46273", "CA_P83774", "CA_Q5ADQ6", 
"CA_P25997", "CA_P83774", "CA_P83774", "CA_Q5A8X6", "CA_Q5A860", 
"CA_Q5A860", "CA_P46273", "HS_A0A0B4J2F0", "HS_A0A0B4J2F0", "HS_A0A0B4J2F0", 
"HS_A0AVT1", "CA_Q5AK04", "HS_A0AVT1", "CA_Q59UR7", "HS_A0FGR8", 
"HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8", 
"HS_A0FGR8", "CA_Q5A397", "CA_Q59ZX4", "HS_A0AVT1", "CA_O42766", 
"CA_P83779", "CA_P83779", "HS_A0AVT1", "HS_A0AVT1", "HS_A0AVT1", 
"CA_Q5A795", "CA_Q5AMI6", "HS_A0FGR8", "CA_Q5AFQ0", "CA_Q5ADQ6", 
"HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8", "CA_P83779", "CA_P83779", 
"CA_Q5A2U8", "CA_Q59W54", "CA_Q5A5F2", "HS_A0AV96", "HS_A0AV96", 
"HS_A0AV96", "HS_A0AV96", "HS_A0AV96", "CA_P30575", "CA_P53698", 
"CA_P53698", "CA_Q59ZX4", "CA_Q59ZX4", "HS_A0FGR8", "CA_P83775", 
"CA_P83779", "CA_P30575", "CA_Q59UR7", "CA_Q59RD8", "HS_A0FGR8", 
"CA_Q5AAL2", "CA_Q59SM9", "CA_Q5A0M4", "HS_A0AVT1", "CA_Q5ANP2", 
"CA_Q5AF03", "CA_Q5ADM7", "CA_Q5ADM7", "CA_Q5ADM7", "CA_Q59W67", 
"CA_Q59YJ9", "CA_P46598", "CA_Q9URB4", "HS_A0AV96", "HS_A0AV96", 
"CA_P83775", "CA_Q5A786", "HS_A0FGR8", "HS_A0FGR8", "HS_A0FGR8", 
"CA_Q59LQ6", "CA_Q59LQ6", "CA_Q5A7T3", "CA_Q5A5A0", "HS_A0AVT1", 
"HS_A0AVT1", "HS_A0AVT1", "CA_P46273", "HS_A0FGR8", "CA_Q5AND4", 
"CA_Q5A389", "CA_Q59W54", "CA_O94083", "CA_Q5ADM7", "CA_Q5ADM7", 
"CA_Q5A5P4", "CA_Q9URB4", "CA_Q9URB4", "CA_P83784", "CA_Q5ADM7", 
"CA_Q5ADM7", "CA_Q5ADM7", "CA_Q5ADM7", "HS_A0AVT1", "CA_P83779", 
"CA_P82611", "HS_A0AV96", "HS_A0AVT1", "CA_P30575", "CA_P46273", 
"CA_P46273", "CA_Q5ADM7", "CA_Q59TE0", "CA_P46598", "HS_A0AVT1", 
"CA_Q5A0Z9", "CA_P0CY33", "CA_O93827", "CA_Q59TE0", "CA_Q59TE0", 
"CA_Q5AMP4", "CA_Q5AMP4", "CA_Q5A5V6", "CA_O74261"), PSMPerPep = c(1L, 
2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 4L, 4L, 4L, 4L, 1L, 
1L, 3L, 3L, 3L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 1L, 2L, 2L, 2L, 2L, 
1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 3L, 3L, 3L, 1L, 5L, 5L, 
5L, 5L, 5L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 
2L, 2L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 
1L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 2L, 6L, 6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, 2L, 2L, 
4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
2L, 2L, 1L, 2L, 2L, 1L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 1L, 1L, 1L, 1L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 
1L, 1L, 3L, 3L, 3L, 2L, 2L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 1L, 
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 
1L, 1L, 1L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 
1L, 1L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L)), .Names = c("Annotated.Sequence", 
"Protein.Accessions", "PSMPerPep"), row.names = c(NA, -300L), class = "data.frame")

1 个答案:

答案 0 :(得分:0)

  1. 一些主要的R IDE会出现像你所做的dput()那样的长线问题(比如将来像@nrussell那样更合适地格式化)
  2. 您是在查看修改后的列还是整行,因为ddply并不能保证您以相同的顺序返回
  3. 即:

    head(xx)
    ##   Annotated.Sequence Protein.Accessions PSMPerPep
    ## 1    ACDLPAWVHFPDTER          HS_A0FGR8         1
    ## 2         ACIGDTLCQK          HS_A0AVT1         2
    ## 3         ACIGDTLCQK          HS_A0AVT1         2
    ## 4           ADEEFFAK          CA_Q9URB4         1
    ## 5      AENPGISFGQVGK          CA_Q9UVL1         1
    ## 6 AETLYEGPSDDPFCTAIR          CA_Q5A0M4         2
    
    yy <- plyr::ddply(xx, "Annotated.Sequence", transform,
                PSMPerPep = length(Annotated.Sequence))
    y1 <- yy
    
    head(yy)
    ##               Annotated.Sequence Protein.Accessions PSMPerPep
    ## 1 AAADYAPNAAVCIISNPVNSTVPIVAEVFK          CA_Q5AMP4         2
    ## 2 AAADYAPNAAVCIISNPVNSTVPIVAEVFK          CA_Q5AMP4         2
    ## 3        AAIRDPNPVVFLENEIAYGETFK          CA_Q5A5V6         1
    ## 4              AAVEEGILPGGGTALIK          CA_O74261         1
    ## 5                ACDLPAWVHFPDTER          HS_A0FGR8         1
    ## 6                     ACIGDTLCQK          HS_A0AVT1         2
    yy <- plyr::ddply(yy, "Protein.Accessions", transform,
                PSMPerProt = length(Protein.Accessions))
    
    head(yy)
    ##    Annotated.Sequence Protein.Accessions PSMPerPep PSMPerProt
    ## 1      AVASSGQELSVEER          CA_O42766         1          2
    ## 2 QAFDDAVADLETLSEDSYK          CA_O42766         1          2
    ## 3   IIAAVPNASDVAVCSSR          CA_O42817         1          1
    ## 4   AAVEEGILPGGGTALIK          CA_O74261         1          1
    ## 5      YVHGGNVLIDPTAK          CA_O93827         1          1
    ## 6            IVDMSTSK          CA_O94083         1          3
    
    head(dplyr::arrange(y1, Annotated.Sequence))
    ##               Annotated.Sequence Protein.Accessions PSMPerPep
    ## 1 AAADYAPNAAVCIISNPVNSTVPIVAEVFK          CA_Q5AMP4         2
    ## 2 AAADYAPNAAVCIISNPVNSTVPIVAEVFK          CA_Q5AMP4         2
    ## 3        AAIRDPNPVVFLENEIAYGETFK          CA_Q5A5V6         1
    ## 4              AAVEEGILPGGGTALIK          CA_O74261         1
    ## 5                ACDLPAWVHFPDTER          HS_A0FGR8         1
    ## 6                     ACIGDTLCQK          HS_A0AVT1         2
    
    head(dplyr::arrange(yy, Annotated.Sequence))
    ##               Annotated.Sequence Protein.Accessions PSMPerPep PSMPerProt
    ## 1 AAADYAPNAAVCIISNPVNSTVPIVAEVFK          CA_Q5AMP4         2          6
    ## 2 AAADYAPNAAVCIISNPVNSTVPIVAEVFK          CA_Q5AMP4         2          6
    ## 3        AAIRDPNPVVFLENEIAYGETFK          CA_Q5A5V6         1          1
    ## 4              AAVEEGILPGGGTALIK          CA_O74261         1          1
    ## 5                ACDLPAWVHFPDTER          HS_A0FGR8         1         50
    ## 6                     ACIGDTLCQK          HS_A0AVT1         2         35
    
    head(dplyr::arrange(xx, Annotated.Sequence))
    ##               Annotated.Sequence Protein.Accessions PSMPerPep
    ## 1 AAADYAPNAAVCIISNPVNSTVPIVAEVFK          CA_Q5AMP4         2
    ## 2 AAADYAPNAAVCIISNPVNSTVPIVAEVFK          CA_Q5AMP4         2
    ## 3        AAIRDPNPVVFLENEIAYGETFK          CA_Q5A5V6         1
    ## 4              AAVEEGILPGGGTALIK          CA_O74261         1
    ## 5                ACDLPAWVHFPDTER          HS_A0FGR8         1
    ## 6                     ACIGDTLCQK          HS_A0AVT1         2
    

    您可以转到dplyr并执行以下操作:

    library(dplyr)
    
    xx %>%
      group_by(Annotated.Sequence) %>%
      mutate(PSMPerPep = n()) %>%
      group_by(Protein.Accessions) %>%
      mutate(PSMPerProt = n()) %>%
      ungroup()
    ## # A tibble: 300 × 4
    ##    Annotated.Sequence Protein.Accessions PSMPerPep PSMPerProt
    ##                 <chr>              <chr>     <int>      <int>
    ## 1     ACDLPAWVHFPDTER          HS_A0FGR8         1         50
    ## 2          ACIGDTLCQK          HS_A0AVT1         2         35
    ## 3          ACIGDTLCQK          HS_A0AVT1         2         35
    ## 4            ADEEFFAK          CA_Q9URB4         1          7
    ## 5       AENPGISFGQVGK          CA_Q9UVL1         1          1
    ## 6  AETLYEGPSDDPFCTAIR          CA_Q5A0M4         2          4
    ## 7  AETLYEGPSDDPFCTAIR          CA_Q5A0M4         2          4
    ## 8           AFDDESVQK          CA_Q5A397         1          2
    ## 9   AILGATNPLQSAPGTIR          CA_Q5AG68         2          2
    ## 10  AILGATNPLQSAPGTIR          CA_Q5AG68         2          2
    ## # ... with 290 more rows
    

    也会保留原来的xx Annotated.Sequence订单。

相关问题