使用值间隔对数据帧进行子集化

时间:2015-11-13 16:42:16

标签: r subset

您好我想要对数据帧(mydata)进行子集化,并且只保留特定元素Hugo_Symbol的Start_Position位于第二个数据框Hemebed中的同一Hugo_Symbol的Start_Position和End_Position之间的行。

我正在使用这段代码,但实际上很慢......有没有更快的想法呢? 非常感谢!

        vector_remove <- NULL
        for (i in 1:dim((mydata))[1]) {
          Hugo_Symbol_name <- mydata[i,]$Hugo_Symbol
          Hugo_Symbol_name <- as.character.factor(Hugo_Symbol_name)
          Hemebed.set <- Hemebed[Hemebed$Hugo_Symbol %in% Hugo_Symbol_name,]
          for (j in 1:dim((Hemebed.set))[1]) {
            Hemebed.set$Start_Position[j]
            Hemebed.set$End_Position[j]
            if (mydata$Start_Position[i] >= Hemebed.set$Start_Position[j] & mydata$Start_Position[i] <= Hemebed.set$End_Position[j]) {
              vector_remove <- c(vector_remove, i)      
            }    
          }
        }


        mydata_Heme_keep <- mydata[c(vector_remove),]


    mydata <- structure(list(Chromosome = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1", 
    "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "2", 
    "20", "21", "22", "3", "4", "5", "6", "7", "8", "9", "X"), class = "factor"), 
        Start_Position = c(36946915, 36946916, 36946917, 36946918, 
        36946919, 36946920, 36946921, 36946922, 36946923, 36946924, 
        36946925, 36946926, 36946927, 36946928, 36946929, 36946930, 
        36946931, 36946932, 36946933, 36946934), End_Position = c(36946915, 
        36946916, 36946917, 36946918, 36946919, 36946920, 36946921, 
        36946922, 36946923, 36946924, 36946925, 36946926, 36946927, 
        36946928, 36946929, 36946930, 36946931, 36946932, 36946933, 
        36946934), Hugo_Symbol = structure(c(46L, 46L, 46L, 46L, 
        46L, 46L, 46L, 46L, 46L, 46L, 46L, 46L, 46L, 46L, 46L, 46L, 
        46L, 46L, 46L, 46L), .Label = c("AKT3", "BCL2L11", "SH2B3", 
        "MEF2B", "PARP2", "PARP3", "RAD50", "CDK4", "CDK6", "CDK8", 
        "CDKN1A", "CDKN1B", "STAG1", "CDKN2A", "CDKN2C", "IKZF1", 
        "NOD1", "YAP1", "CEBPA", "CTCF", "GNA13", "CCT6B", "STAG2", 
        "PLK2", "FRS2", "MALT1", "PNRC1", "CHD2", "PTPRT", "FAF1", 
        "CHEK2", "U2AF2", "CHUK", "ESCO1", "CKS1B", "EXOSC6", "FAT3", 
        "LRRK2", "CPS1", "CREBBP", "AMER1", "AAMP", "PARP1", "PARP4", 
        "CSF1R", "CSF3R", "APCDD1", "CTNNA1", "CTNNB1", "CUX1", "CYLD", 
        "ESCO2", "CYP17A1", "DAXX"), class = "factor")), .Names = c("Chromosome", 
    "Start_Position", "End_Position", "Hugo_Symbol"), row.names = c(NA, 
    20L), class = "data.frame")


Hemebed <- structure(list(Chromosome = c("1", "1", "1", "1", "1", "1", "1", 
"1", "1", "1", "1", "1", "1", "1", "1", "1"), Start_Position = c(36931697L, 
36931958L, 36932822L, 36933156L, 36933423L, 36933676L, 36934757L, 
36935253L, 36937034L, 36937654L, 36937839L, 36938118L, 36939036L, 
36939365L, 36940978L, 36945016L), End_Position = c(36931801L, 
36932509L, 36932921L, 36933255L, 36933563L, 36933822L, 36934858L, 
36935441L, 36937247L, 36937753L, 36937992L, 36938287L, 36939223L, 
36939488L, 36941274L, 36945115L), Hugo_Symbol = structure(c(109L, 
109L, 109L, 109L, 109L, 109L, 109L, 109L, 109L, 109L, 109L, 109L, 
109L, 109L, 109L, 109L), .Label = c("AAMP", "ABL1", "ABL2", "ACTB", 
"AKT1", "AKT2", "AKT3", "ALK", "ALOX12B", "APC", "APCDD1", "APH1A", 
"AR", "ARAF", "ARFRP1", "ARHGAP26", "ARID1A", "ARID1B", "ARID2", 
"ARID5B", "ASMTL", "ASXL1", "ASXL2", "ASXL3", "ATM", "ATR", "ATRX", 
"AURKA", "AURKB", "AXIN1", "AXIN2", "AXL", "B2M", "BACH1", "BAP1", 
"BARD1", "BBC3", "BCL10", "BCL11B", "BCL2", "BCL2L1", "BCL2L11", 
"BCL2L11_BCL2L11_BIM", "BCL2L2", "BCL6", "BCL7A", "BCOR", "BCORL1", 
"BIRC2", "BIRC3", "BLM", "BMPR1A", "BRAF", "BRCA1", "BRCA2", 
"BRD4", "BRIP1", "BRSK1", "BTG1", "BTG2", "BTK", "BTLA", "C11orf30", 
"C17orf39", "CAD", "CALR", "CARD11", "CASP8", "CBFB", "CBL", 
"CCND1", "CCND2", "CCND3", "CCNE1", "CCT6B", "CD22", "CD274", 
"CD276", "CD36", "CD58", "CD70", "CD79A", "CD79B", "CDC73", "CDH1", 
"CDK12", "CDK4", "CDK6", "CDK8", "CDKN1A", "CDKN1B", "CDKN2A", 
"CDKN2B", "CDKN2C", "CEBPA", "CHD2", "CHEK1", "CHEK2", "CHUK", 
"CIC", "CIITA", "CKS1B", "CPS1", "CRBN", "CREBBP", "CRKL", "CRLF2", 
"CSF1R", "CSF3R", "CTCF", "CTLA4", "CTNNA1", "CTNNB1", "CUL3", 
"CUL4A", "CUL4B", "CUX1", "CXCR4", "CYLD", "CYP17A1", "D2HGDH", 
"DAXX", "DCUN1D1", "DDR2", "DDX3X", "DICER1", "DIS3", "DKC1", 
"DNM2", "DNMT1", "DNMT3A", "DNMT3B", "DOT1L", "DTX1", "DUSP2", 
"DUSP9", "E2F3", "EBF1", "ECT2L", "EED", "EGFL7", "EGFR", "EIF1AX", 
"ELP2", "EP300", "EPCAM", "EPHA3", "EPHA5", "EPHA7", "EPHB1", 
"ERBB2", "ERBB3", "ERBB4", "ERCC2", "ERCC3", "ERCC4", "ERCC5", 
"ERG", "ESCO1", "ESCO2", "ESR1", "ETS1", "ETV1", "ETV6", "EWSR1", 
"EXOSC6", "EZH2", "FAF1", "FAM123B", "FAM175A", "FAM46C", "FANCA", 
"FANCC", "FANCD2", "FANCE", "FANCF", "FANCG", "FANCI", "FANCL", 
"FANCM", "FAS", "FAT1", "FAT3", "FBXO11", "FBXO31", "FBXW7", 
"FGF10", "FGF12", "FGF14", "FGF19", "FGF23", "FGF3", "FGF4", 
"FGF6", "FGF7", "FGFR1", "FGFR2", "FGFR3", "FGFR4", "FH", "FHIT", 
"FLCN", "FLT1", "FLT3", "FLT4", "FLYWCH1", "FOXA1", "FOXL2", 
"FOXO1", "FOXO3", "FOXP1", "FRS2", "FUBP1", "FYN", "GADD45B", 
"GATA1", "GATA2", "GATA3", "GLI1", "GNA11", "GNA12", "GNA13", 
"GNAQ", "GNAS", "GPR124", "GREM1", "GRIN2A", "GSK3B", "GTSE1", 
"H3F3C", "HDAC1", "HDAC4", "HDAC7", "HGF", "HIF1A", "HIST1H1C", 
"HIST1H1D", "HIST1H1E", "HIST1H2AC", "HIST1H2AG", "HIST1H2AL", 
"HIST1H2AM", "HIST1H2BC", "HIST1H2BD", "HIST1H2BJ", "HIST1H2BK", 
"HIST1H2BO", "HIST1H3B", "HLA-A", "HLA-B", "HMGA2", "HNF1A", 
"HRAS", "HSP90AA1", "ICK", "ICOSLG", "ID3", "IDH1", "IDH2", "IFNGR1", 
"IGF1", "IGF1R", "IGF2", "IKBKE", "IKZF1", "IKZF2", "IKZF3", 
"IL10", "IL7R", "INHBA", "INPP4A", "INPP4B", "INPP5D", "INSR", 
"IRF1", "IRF4", "IRF8", "IRS1", "IRS2", "JAK1", "JAK2", "JAK3", 
"JARID2", "JUN", "KDM2B", "KDM4C", "KDM5A", "KDM5C", "KDM6A", 
"KDR", "KEAP1", "KIT", "KLF4", "KLHL6", "KRAS", "LATS1", "LATS2", 
"LEF1", "LFNG", "LMO1", "LRP1B", "LRRK2", "MAF", "MAFB", "MAGED1", 
"MALT1", "MAP2K1", "MAP2K2", "MAP2K4", "MAP3K1", "MAP3K13", "MAP3K14", 
"MAP3K6", "MAP3K7", "MAPK1", "MAX", "MCL1", "MDC1", "MDM2", "MDM4", 
"MED12", "MEF2B", "MEF2C", "MEN1", "MET", "MIB1", "MIR17HG", 
"MITF", "MKI67", "MLH1", "MLL", "MLL2", "MLL3", "MMSET", "MPL", 
"MRE11A", "MSH2", "MSH3", "MSH6", "MSI1", "MSI2", "MTAP", "MTOR", 
"MUTYH", "MYB", "MYC", "MYCL1", "MYCN", "MYD88", "MYO18A", "MYOD1", 
"MYST3", "NBN", "NCOR1", "NCOR2", "NCSTN", "nds_chrY_14891426", 
"nds_chrY_2655223", "NF1", "NF2", "NFE2L2", "NFKB1", "NFKB2", 
"NFKBIA", "NKX2-1", "NKX3-1", "NME1", "NOD1", "NOTCH1", "NOTCH2", 
"NOTCH3", "NOTCH4", "NPM1", "NRAS", "NSD1", "NT5C2", "NTRK1", 
"NTRK2", "NTRK3", "NUP93", "NUP98", "NUT_C15orf55", "P2RY8", 
"PAG1", "PAK1", "PAK3", "PAK7", "PALB2", "PARK2", "PARP1", "PARP2", 
"PARP3", "PARP4", "PASK", "PAX5", "PAX8", "PBRM1", "PC", "PCBP1", 
"PCLO", "PDCD1", "PDCD11", "PDCD1LG2", "PDGFRA", "PDGFRB", "PDK1", 
"PDPK1", "PDS5B", "PHF6", "PHOX2B", "PIK3C2G", "PIK3C3", "PIK3CA", 
"PIK3CB", "PIK3CD", "PIK3CG", "PIK3R1", "PIK3R2", "PIK3R3", "PIM1", 
"PLCG2", "PLK2", "PMAIP1", "PMS1", "PMS2", "PNRC1", "POFUT1", 
"POGLUT1", "POLE", "POT1", "PPP2R1A", "PRDM1", "PRKAR1A", "PRKDC", 
"PRSS8", "PTCH1", "PTEN", "PTPN11", "PTPN2", "PTPN6", "PTPRD", 
"PTPRO", "PTPRS", "PTPRT", "RAC1", "RAD21", "RAD50", "RAD51", 
"RAD51C", "RAD51L1", "RAD51L3", "RAD52", "RAD54L", "RAF1", "RARA", 
"RASA1", "RASGEF1A", "RB1", "RBM10", "RECQL4", "REL", "RELN", 
"RET", "RFWD2", "RHOA", "RHOH", "RICTOR", "RIT1", "RNF43", "ROS", 
"ROS_ROS1", "ROS1", "RPA1", "RPL11", "RPL13", "RPL15", "RPL35A", 
"RPS14", "RPS19", "RPS26", "RPS6KA4", "RPS6KB2", "RPTOR", "rs1000232", 
"rs10004114", "rs10021694", "rs1003179", "rs10067046", "rs10078699", 
"rs1008365", "rs10094320", "rs10097828", "rs10102929", "rs10106085", 
"rs1010745", "rs10123304", "rs10140137", "rs10142753", "rs10150202", 
"rs10155303", "rs10159658", "rs10162861", "rs10174397", "rs10187908", 
"rs1018794", "rs10203366", "rs10226944", "rs10230672", "rs10231971", 
"rs10233848", "rs10253228", "rs1025743", "rs1026257", "rs1026504", 
"rs1031914", "rs1033035", "rs1039057", "rs1040636", "rs10418925", 
"rs10451916", "rs10483863", "rs10488329", "rs10489673", "rs10503249", 
"rs10503610", "rs10512324", "rs10513286", "rs10516507", "rs10520660", 
"rs10732921", "rs10734436", "rs10740230", "rs10741747", "rs10747645", 
"rs10749377", "rs10749957", "rs10753110", "rs1075547", "rs10770329", 
"rs10770338", "rs10772690", "rs10775377", "rs10775524", "rs10778450", 
"rs10783020", "rs10783669", "rs10784009", "rs1078402", "rs10788835", 
"rs10799615", "rs10800447", "rs10800595", "rs10802883", "rs10804160", 
"rs10807969", "rs10811085", "rs10812896", "rs10814848", "rs10818929", 
"rs10827368", "rs10832446", "rs10833939", "rs10841451", "rs10845372", 
"rs10849154", "rs10859505", "rs10859508", "rs10861667", "rs10865315", 
"rs10865869", "rs10872905", "rs10873015", "rs10878697", "rs10880923", 
"rs10891259", "rs10894489", "rs10897315", "rs10899035", "rs10901638", 
"rs10903464", "rs10908943", "rs10911580", "rs10919044", "rs10924340", 
"rs10972381", "rs11035554", "rs11068469", "rs11071752", "rs11073424", 
"rs11076631", "rs11077530", "rs11078015", "rs11079395", "rs11080015", 
"rs11081772", "rs11082713", "rs11088585", "rs11091840", "rs11096687", 
"rs11103182", "rs11103233", "rs11105839", "rs11126691", "rs11128848", 
"rs11129341", "rs11129353", "rs11129993", "rs11150134", "rs11151937", 
"rs11156730", "rs11162256", "rs11163482", "rs11174442", "rs11188870", 
"rs11208766", "rs1124493", "rs11247079", "rs11250159", "rs1148210", 
"rs1153600", "rs1156784", "rs11588419", "rs11598212", "rs11601693", 
"rs11612664", "rs11638468", "rs11639134", "rs11651129", "rs1165155", 
"rs11660099", "rs11663406", "rs11669449", "rs11677053", "rs11694644", 
"rs11697412", "rs11698886", "rs11712937", "rs11723866", "rs11731332", 
"rs11735128", "rs11740475", "rs11743261", "rs11747505", "rs11753185", 
"rs11761839", "rs11771692", "rs11773913", "rs11777002", "rs1178359", 
"rs1178370", "rs11815325", "rs11850859", "rs11857176", "rs11862384", 
"rs11887586", "rs11888592", "rs11890408", "rs11891934", "rs11897624", 
"rs11898345", "rs11902458", "rs11922110", "rs11938446", "rs11938551", 
"rs11949556", "rs11952711", "rs11955354", "rs11960788", "rs11984766", 
"rs11991912", "rs11998282", "rs1203659", "rs120434", "rs12136079", 
"rs12233855", "rs12403323", "rs12408981", "rs12437942", "rs12438635", 
"rs12441013", "rs12442455", "rs12463443", "rs12464446", "rs12465102", 
"rs12465204", "rs12477610", "rs12493081", "rs12497253", "rs12497518", 
"rs12500111", "rs12502722", "rs12503437", "rs12513797", "rs12516563", 
"rs12517769", "rs12530182", "rs12533939", "rs1253820", "rs12541274", 
"rs12543849", "rs12544387", "rs12547059", "rs12549299", "rs12551126", 
"rs12593415", "rs12603034", "rs12605566", "rs12610868", "rs12612024", 
"rs12616371", "rs12641943", "rs12643470", "rs12649647", "rs12655584", 
"rs12657445", "rs12659148", "rs12667043", "rs12667380", "rs12668848", 
"rs12676327", "rs12678397", "rs12684797", "rs12687359", "rs1319954", 
"rs1323862", "rs132410", "rs1327583", "rs1329680", "rs1331624", 
"rs133216", "rs1335143", "rs1338519", "rs1339474", "rs1339673", 
"rs1341438", "rs1341675", "rs1342813", "rs1346820", "rs1353461", 
"rs1363046", "rs1365294", "rs1365740", "rs1384623", "rs1387024", 
"rs1395907", "rs1402473", "rs1406084", "rs1411289", "rs1412227", 
"rs1417922", "rs1420175", "rs1420518", "rs1428642", "rs1429369", 
"rs1430230", "rs1430526", "rs1430564", "rs1432154", "rs1434199", 
"rs1441136", "rs1453703", "rs1456078", "rs1458590", "rs1459551", 
"rs1464413", "rs1474372", "rs1479996", "rs1481156", "rs1485315", 
"rs1488049", "rs1488745", "rs1491868", "rs1494188", "rs1495525", 
"rs1504081", "rs1505696", "rs1517662", "rs151796", "rs1521696", 
"rs1524358", "rs1527062", "rs1527879", "rs1528317", "rs1529968", 
"rs1530562", "rs1534520", "rs1535501", "rs1536766", "rs1540186", 
"rs154989", "rs1551634", "rs1554936", "rs155864", "rs156095", 
"rs1569836", "rs1570932", "rs1582317", "rs159734", "rs1624525", 
"rs1637497", "rs1648306", "rs165583", "rs1669694", "rs167510", 
"rs1691921", "rs16980598", "rs1718052", "rs1725235", "rs17260872", 
"rs1736839", "rs174211", "rs1744266", "rs1780615", "rs1793609", 
"rs1807193", "rs1814089", "rs181652", "rs1822929", "rs1829065", 
"rs1834311", "rs1861242", "rs1863384", "rs1866136", "rs1867437", 
"rs1867718", "rs1883278", "rs1885615", "rs1886152", "rs1886551", 
"rs1888171", "rs1898917", "rs1904925", "rs1916800", "rs1918319", 
"rs1918939", "rs1926005", "rs1931656", "rs1939508", "rs1944463", 
"rs194609", "rs1946940", "rs1947060", "rs1950605", "rs1951073", 
"rs1959730", "rs1977501", "rs1979260", "rs1981539", "rs1982909", 
"rs1993032", "rs199347", "rs1995337", "rs1996770", "rs1999923", 
"rs200630", "rs2024937", "rs2030040", "rs2030171", "rs2030328", 
"rs2034347", "rs2042429", "rs2046475", "rs2048245", "rs2049438", 
"rs2051068", "rs2052532", "rs2053530", "rs2058043", "rs2071317", 
"rs2071394", "rs2077055", "rs2097657", "rs210219", "rs2110540", 
"rs2114239", "rs2121863", "rs2129907", "rs2135845", "rs2140849", 
"rs2151554", "rs2160043", "rs2167128", "rs2168454", "rs217256", 
"rs2182161", "rs2189930", "rs2192804", "rs2193624", "rs2208952", 
"rs2209388", "rs2210034", "rs2215195", "rs2219088", "rs2225832", 
"rs2235438", "rs2235646", "rs2238749", "rs2241191", "rs2243566", 
"rs2244353", "rs2246693", "rs2257468", "rs2257540", "rs2257766", 
"rs226111", "rs2266802", "rs2267600", "rs2269349", "rs2269615", 
"rs2279020", "rs2283170", "rs2285615", "rs2286593", "rs2286645", 
"rs2287434", "rs2290932", "rs2291313", "rs2297081", "rs2303748", 
"rs2325020", "rs2331207", "rs233621", "rs2347790", "rs2355654", 
"rs2373852", "rs2380943", "rs2397660", "rs2417982", "rs2419304", 
"rs2419407", "rs2433651", "rs2437446", "rs2442496", "rs2488375", 
"rs2494971", "rs2503069", "rs2505257", "rs2519713", "rs252234", 
"rs2528611", "rs2535310", "rs2548632", "rs2553794", "rs2555605", 
"rs2560373", "rs2568201", "rs2571219", "rs2594953", "rs2596902", 
"rs2602822", "rs2619062", "rs2619118", "rs2632032", "rs263240", 
"rs2634103", "rs2646218", "rs2649712", "rs265005", "rs2656176", 
"rs2679665", "rs2679748", "rs2683544", "rs2684222", "rs2691669", 
"rs2704476", "rs270487", "rs2711419", "rs2729544", "rs273172", 
"rs273628", "rs2757027", "rs2758629", "rs2765909", "rs2800485", 
"rs2814992", "rs2815118", "rs2821143", "rs2825168", "rs283044", 
"rs2832533", "rs2835302", "rs2847348", "rs2853252", "rs2889190", 
"rs2900628", "rs2906231", "rs2907603", "rs2908977", "rs2917728", 
"rs291783", "rs29193", "rs2924105", "rs2932532", "rs2934813", 
"rs2948998", "rs2959019", "rs2964101", "rs2964131", "rs2977794", 
"rs29798", "rs2989871", "rs2999357", "rs300152", "rs3024560", 
"rs303387", "rs306288", "rs31042", "rs3130827", "rs318416", "rs321176", 
"rs327677", "rs329321", "rs33053", "rs337259", "rs360093", "rs360956", 
"rs362646", "rs362813", "rs3743294", "rs3756208", "rs3769210", 
"rs3770530", "rs3778984", "rs3782553", "rs3783816", "rs3784429", 
"rs3784621", "rs3790428", "rs3800282", "rs380155", "rs3802268", 
"rs3803234", "rs3807634", "rs3817725", "rs3818270", "rs3854", 
"rs3910736", "rs3910910", "rs3914871", "rs3922856", "rs3924787", 
"rs3960412", "rs402541", "rs40644", "rs4072488", "rs4101647", 
"rs410262", "rs412510", "rs4129288", "rs4144092", "rs420101", 
"rs4236296", "rs423686", "rs4239922", "rs4241875", "rs4242095", 
"rs4258627", "rs429969", "rs430807", "rs4308333", "rs4318636", 
"rs4327753", "rs4329843", "rs435202", "rs4356470", "rs4370849", 
"rs4371464", "rs4398066", "rs4411464", "rs4427485", "rs4441710", 
"rs4451831", "rs4481591", "rs4490567", "rs4491357", "rs4495891", 
"rs450818", "rs4509669", "rs4526421", "rs4536526", "rs4536875", 
"rs454312", "rs4567809", "rs4572435", "rs4591585", "rs4608730", 
"rs4610575", "rs4617924", "rs4625043", "rs4643323", "rs464500", 
"rs4655036", "rs4656809", "rs4671714", "rs4672233", "rs4673024", 
"rs4673238", "rs4677451", "rs4678951", "rs4679739", "rs4700308", 
"rs4711819", "rs4716167", "rs4726088", "rs4732165", "rs4736186", 
"rs4737959", "rs4740054", "rs4741818", "rs4747886", "rs4751376", 
"rs4755815", "rs4756547", "rs475865", "rs4786416", "rs4789110", 
"rs4789882", "rs4795799", "rs4797070", "rs4805885", "rs4811719", 
"rs4821995", "rs4825872", "rs4858795", "rs485988", "rs4860174", 
"rs4888839", "rs4889606", "rs4892924", "rs4895085", "rs4900589", 
"rs4901224", "rs4916667", "rs4937385", "rs4941979", "rs4945074", 
"rs4948924", "rs4949254", "rs4950346", "rs4953086", "rs4953445", 
"rs4958301", "rs4960181", "rs4961234", "rs4961670", "rs4962305", 
"rs4964049", "rs4966012", "rs4976264", "rs4978425", "rs5008177", 
"rs5015420", "rs527697", "rs529821", "rs533209", "rs533571", 
"rs534817", "rs564201", "rs567695", "rs5749122", "rs576762", 
"rs578974", "rs579172", "rs588231", "rs5907921", "rs5919974", 
"rs5925700", "rs5933384", "rs5935490", "rs5936531", "rs595289", 
"rs595496", "rs5978265", "rs5986635", "rs5998941", "rs6006984", 
"rs6015534", "rs602017", "rs6051490", "rs6055187", "rs6061867", 
"rs6073049", "rs6079138", "rs6080154", "rs6085786", "rs6095681", 
"rs6100460", "rs610329", "rs6113024", "rs611313", "rs6129804", 
"rs6135517", "rs6136727", "rs614516", "rs615382", "rs630706", 
"rs636909", "rs642189", "rs6426446", "rs6437337", "rs6438074", 
"rs6438822", "rs6447494", "rs644818", "rs6451151", "rs6455233", 
"rs6460897", "rs6461264", "rs6463033", "rs6463347", "rs6469932", 
"rs6475473", "rs6486088", "rs6491430", "rs6492866", "rs650129", 
"rs6505356", "rs6523931", "rs6534936", "rs6540406", "rs6547651", 
"rs6560798", "rs6562039", "rs6567221", "rs6573105", "rs6573142", 
"rs6574581", "rs6575809", "rs6584424", "rs6586235", "rs6592302", 
"rs6600227", "rs6627327", "rs6645103", "rs666003", "rs6665591", 
"rs6669078", "rs6695355", "rs6698595", "rs6710671", "rs6712597", 
"rs6716343", "rs6719473", "rs6720783", "rs6750530", "rs6759683", 
"rs6774940", "rs6783710", "rs6785046", "rs6800661", "rs6806089", 
"rs6806344", "rs681665", "rs6821055", "rs6823304", "rs6830887", 
"rs6831412", "rs6837139", "rs6837853", "rs6848521", "rs6858568", 
"rs6860330", "rs6861399", "rs6864687", "rs6885929", "rs6889147", 
"rs690343", "rs6909545", "rs6912317", "rs6919109", "rs6919748", 
"rs6921880", "rs6925590", "rs6933379", "rs6946785", "rs6947782", 
"rs6953684", "rs6964140", "rs6982161", "rs6982849", "rs6982872", 
"rs6984966", "rs6990155", "rs699576", "rs7006706", "rs701286", 
"rs7015970", "rs7017910", "rs7029585", "rs7030167", "rs7030379", 
"rs7031280", "rs7038447", "rs7057233", "rs7071984", "rs7084555", 
"rs7087038", "rs7093997", "rs7095623", "rs7097856", "rs7120118", 
"rs7127862", "rs712878", "rs7129619", "rs7134781", "rs713644", 
"rs7141943", "rs714393", "rs7152377", "rs715332", "rs7161761", 
"rs716655", "rs7173371", "rs7184753", "rs719914", "rs7205084", 
"rs720566", "rs7215861", "rs722135", "rs7231097", "rs7241627", 
"rs7247429", "rs7250431", "rs7252682", "rs725888", "rs7260408", 
"rs728173", "rs7297243", "rs7298878", "rs7309492", "rs7313402", 
"rs7314445", "rs733428", "rs734545", "rs7349119", "rs7370446", 
"rs738156", "rs7382429", "rs738686", "rs742131", "rs743241", 
"rs7437388", "rs744027", "rs7443990", "rs744876", "rs7448848", 
"rs746018", "rs7463591", "rs746493", "rs747888", "rs7483889", 
"rs751193", "rs7525210", "rs753795", "rs7540116", "rs754332", 
"rs7545468", "rs7554201", "rs756691", "rs7567463", "rs757996", 
"rs7587559", "rs7597093", "rs7597374", "rs7597437", "rs7598657", 
"rs7604226", "rs7608771", "rs7620308", "rs7620623", "rs7630344", 
"rs764605", "rs7648196", "rs7656912", "rs7659727", "rs766146", 
"rs768759", "rs7701748", "rs7709242", "rs7711007", "rs7720376", 
"rs7724913", "rs772515", "rs7727036", "rs7728579", "rs7730813", 
"rs7762582", "rs777107", "rs7774405", "rs7777622", "rs7790008", 
"rs7791589", "rs780930", "rs7814311", "rs781845", "rs7820557", 
"rs782097", "rs7833175", "rs7840384", "rs7857300", "rs7875086", 
"rs788433", "rs7907882", "rs791673", "rs7918280", "rs7928594", 
"rs7937418", "rs7943757", "rs7949634", "rs7955513", "rs7956788", 
"rs7968048", "rs7981238", "rs7990641", "rs7995811", "rs7999126", 
"rs8002817", "rs8008989", "rs8015905", "rs8021311", "rs8025445", 
"rs802774", "rs8036299", "rs8057500", "rs8082959", "rs8089950", 
"rs8096468", "rs8109968", "rs8129186", "rs8181996", "rs8190780", 
"rs839543", "rs845215", "rs847901", "rs858891", "rs869357", "rs879835", 
"rs880922", "rs880963", "rs884303", "rs889029", "rs889362", "rs902780", 
"rs907360", "rs912505", "rs916424", "rs916732", "rs918595", "rs919694", 
"rs919723", "rs920712", "rs921931", "rs9266791", "rs9289628", 
"rs929019", "rs9292261", "rs9296469", "rs9303259", "rs931996", 
"rs9321767", "rs9322521", "rs9326264", "rs9341911", "rs9348239", 
"rs9356708", "rs9366232", "rs9376390", "rs9387216", "rs9401144", 
"rs9410830", "rs9443638", "rs9459057", "rs947206", "rs948330", 
"rs951491", "rs9525166", "rs952557", "rs9526114", "rs9531007", 
"rs953593", "rs9538560", "rs953958", "rs955813", "rs9573824", 
"rs9584005", "rs958738", "rs959897", "rs961996", "rs9628100", 
"rs9635539", "rs9648980", "rs966433", "rs968971", "rs969230", 
"rs9736791", "rs981181", "rs9824480", "rs9839541", "rs9840640", 
"rs9841791", "rs9842109", "rs9846213", "rs9853460", "rs9853621", 
"rs9857154", "rs9862153", "rs9864293", "rs9906591", "rs9927800", 
"rs9928312", "rs994244", "rs9955288", "rs996407", "rs9968294", 
"rs9992579", "RUNX1", "RUNX1T1", "RYBP", "S1PR2", "SBDS", "SDHA", 
"SDHAF2", "SDHB", "SDHC", "SDHD", "SERP2", "SETBP1", "SETD2", 
"SF3B1", "SGK1", "SH2B3", "SH2D1A", "SHQ1", "SMAD2", "SMAD3", 
"SMAD4", "SMARCA1", "SMARCA4", "SMARCB1", "SMARCD1", "SMC1A", 
"SMC3", "SMO", "SOCS1", "SOCS2", "SOCS3", "SOX10", "SOX17", "SOX2", 
"SOX9", "SPEN", "SPOP", "SRC", "SRSF2", "STAG1", "STAG2", "STAT3", 
"STAT4", "STAT5A", "STAT5B", "STAT6", "STK11", "STK40", "SUFU", 
"SUZ12", "SYK", "TAF1", "TBL1XR1", "TBX3", "TCF3", "TCL1A", "TERT", 
"TET1", "TET2", "TET3", "TGFBR1", "TGFBR2", "TIPARP", "TLL2", 
"TMEM127", "TMEM30A", "TMPRSS2", "TMSL3", "TNFAIP3", "TNFRSF11A", 
"TNFRSF14", "TNFRSF17", "TNFSF9", "TOP1", "TOX", "TP53", "TP63", 
"TRAF2", "TRAF3", "TRAF5", "TRAF7", "TRRAP", "TSC1", "TSC2", 
"TSHR", "TUSC3", "TYK2", "U2AF1", "U2AF2", "VHL", "VTCN1", "WDR90", 
"WHSC1", "WISP3", "WT1", "WWOX", "XBP1", "XIAP", "XPO1", "XRCC3", 
"YAP1", "YES1", "YY1AP1", "ZMYM3", "ZNF217", "ZNF24", "ZNF703", 
"ZRSR2"), class = "factor")), .Names = c("Chromosome", "Start_Position", 
"End_Position", "Hugo_Symbol"), row.names = 205:220, class = "data.frame")

2 个答案:

答案 0 :(得分:2)

GenomicRanges包即将成为你最好的朋友:它提供基于范围的函数和生物语义,以解决你所描述的那种问题。

不是将每个核苷酸位置表示为一行(效率低),而是将对象的范围(包括染色体和链)表示为Granges对象:

library(GenomicRanges)

gr <- GRanges(seqnames = "chr1", 
          ranges = IRanges(start = min(mydata$Start_Position),
                           end = max(mydata$End_Position)),
          strand = "*",
          symbol = "CSF3R")

从那里回答任何类型的重叠型问题都是微不足道的。例如,列出与您感兴趣的基因重叠的所有已知转录本:

library(TxDb.Hsapiens.UCSC.hg19.knownGene)
subsetByOverlaps(transcripts(TxDb.Hsapiens.UCSC.hg19.knownGene), subject = gr)

## GRanges object with 3 ranges and 2 metadata columns:
##  seqnames               ranges strand |     tx_id     tx_name
##     <Rle>            <IRanges>  <Rle> | <integer> <character>
## [1]     chr1 [36931644, 36948915]      - |      4997  uc001cav.2
## [2]     chr1 [36931644, 36948915]      - |      4998  uc001caw.2
## [3]     chr1 [36931644, 36948915]      - |      4999  uc001cax.2
## -------
## seqinfo: 93 sequences (1 circular) from hg19 genome

答案 1 :(得分:0)

您可以尝试申请:

apply(mydata, 1, function(x) any(x[1] == Hemebed$Chromosome &
                                 x[2] >= Hemebed$Start_Position &
                                 x[2] <= Hemebed$End_Position &
                                 x[4] == Hemebed$Hugo_Symbol))