有没有一种方法可以优化此语句:从向量中选择值

时间:2020-08-17 13:28:58

标签: r dplyr data.table tidyr

我有这段代码,由于 length(nodes_data_frame_name)实际上在 4398886 左右,并且可能更大,因此我不得不反复运行此操作,因此运行时间加起来。 有没有办法加快速度? 我已经在library("fastmatch")

中使用了这种方法
`%fin%` <- function(x, table) {
  base::stopifnot(require(fastmatch))
  fastmatch::fmatch(x, table, nomatch = 0L) > 0L
}


library("fastmatch")

        nodes_data_frame_name<-c("258829929", "703216920", "6644171982", "251966313", "3931934622", 
                             "253309719", "2466351225", "1143847424", "3805799829", "3653247881", 
                             "3706587621", "4093399476", "347326202", "305944935", "10848169", 
                             "2892869997", "3571191294", "266877902", "4140372707", "309271729", 
                             "1039396062", "574814899", "3155960859", "833342139", "4748559933", 
                             "3810929288", "340350756", "267395419", "5268703124", "3334712997", 
                             "1863072113", "2685248487", "5329947980", "408944338", "4322827361", 
                             "4374244127", "301058607", "456849376", "265681063", "292167759", 
                             "566748339", "1713229944", "482768671", "6645842544", "321659162", 
                             "3992646921", "30434110", "2115024856", "69168601", "60754307", 
                             "7425715510", "295582750", "294248157", "461987391", "336673995", 
                             "5605013141", "4227140706", "3876476839", "3622727879", "1271918831", 
                             "1449305359", "1669317626", "3751009335", "304451012", "365497650", 
                             "4111192", "4111194", "4111580", "4111585", "4111601", "4112927", "4112937", 
                             "6919612351", "60690239", "467148800", "4516714791", "1435448741", 
                             "298851711", "2979008487", "5091185609", "246826471", "3412445144", 
                             "83654266", "454479554", "59924822", "1107875524", "501591106", 
                             "492754851", "967133877", "4977383917", "2249188589", "3722077419", 
                             "475411253", "410759666", "1869135092", "258875186", "441314988", 
                             "336564806", "3056214519", "319522225", "1889169860", "3045918127"
    )
    
        subset_ids_test<-c(4055282, 4055303, 4055305, 4055312, 4055315, 4055318, 4086700, 
          4086701, 4086702, 4086703, 4086704, 4086705, 4086706, 4086707, 
          4086708, 4086709, 4086710, 4086719, 4086720, 4086721, 4086722, 
          4088769, 4088946, 4088948, 4091853, 4091854, 4091863, 4091865, 
          4091869, 4091873, 4091874, 4091879, 4091880, 4091882, 4091883, 
          4091884, 4091886, 4096404, 4097511, 4097522, 4097523, 4097524, 
          4097525, 4097526, 4097528, 4097529, 4097530, 4097531, 4097532, 
          4097533, 4097534, 4098001, 4098002, 4098698, 4098700, 4098911, 
          4098912, 4098913, 4110806, 4110809, 4110810, 4110811, 4111190, 
          4111192, 4111194, 4111580, 4111585, 4111601, 4112927, 4112937, 
          4112944, 4112948, 4112956, 4112964, 4112966, 4112969, 4112972, 
          4112977, 4112978, 4112982, 4112986, 4115362, 4115371, 4115377, 
          4115378, 4115384, 4115386, 4115392, 4115393, 4115397, 4116185, 
          4116351, 4116353, 4116355, 4116356, 4116362, 4116374, 4116518, 
          4162233, 4162234)
    
    
    subset_index_test <- as.numeric(nodes_data_frame_name) %fin% subset_ids_test
    vids <- nodes_data_frame_name[subset_index_test]

 

1 个答案:

答案 0 :(得分:1)

这里是一种选择:

library(data.table)
nodes_data_frame_name[which(nodes_data_frame_name %chin% as.character(subset_ids_test))]
# "4111192" "4111194" "4111580" "4111585" "4111601" "4112927" "4112937"

如果没有重复,我想如果速度更快,您可以尝试:

nodes_data_frame_name[chmatch(as.character(subset_ids_test), nodes_data_frame_name, nomatch = 0L)]