如何将字符串与"。"分开。使用dplyr作为数据框中的分隔符

时间:2017-06-08 04:13:56

标签: r dplyr

我有以下数据框:

df <- structure(list(united_sample_names = structure(92:101, .Label = c("1-XX_iv.161202_NB501621_0042_AHV7NWBGXY.KK_208", 
"1-XX_iv.161202_NB501621_0042_AHV7NWBGXY.KK_209", "1-XX_iv.161202_NB501621_0042_AHV7NWBGXY.KK_210", 
"1-XX_iv.161202_NB501621_0042_AHV7NWBGXY.KK_211", "1-XX_iv.161202_NB501621_0042_AHV7NWBGXY.KK_212", 
"1-XX_iv.161202_NB501621_0042_AHV7NWBGXY.KK_213", "1-XX_iv.161202_NB501621_0042_AHV7NWBGXY.KK_214", 
"1-XX_iv.161202_NB501621_0042_AHV7NWBGXY.KK_215", "1-XX_iv.161202_NB501621_0042_AHV7NWBGXY.KK_216", 
"1-XX_iv.161202_NB501621_0042_AHV7NWBGXY.KK_218", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_172", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_173", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_174", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_175", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_176", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_177", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_178", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_179", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_180", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_181", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_182", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_183", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_184", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_185", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_186", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_187", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_188", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_189", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_190", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_191", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_192", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_193", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_194", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_195", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_196", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_197", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_198", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_199", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_200", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_201", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_202", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_203", "1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_359", 
"1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_360", "1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_361", 
"1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_362", "1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_363", 
"1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_364", "1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_365", 
"1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_366", "1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_367", 
"1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_368", "1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_369", 
"1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_370", "1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_371", 
"1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_372", "1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_373", 
"1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_374", "1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_375", 
"1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_376", "1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_377", 
"1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_378", "1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_379", 
"1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_381", "1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_382", 
"1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_383", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_100", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_101", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_102", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_103", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_104", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_106", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_107", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_109", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_110", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_111", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_112", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_113", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_114", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_115", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_116", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_117", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_118", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_119", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_120", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_122", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_124", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_125", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_126", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_127", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_128", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_86", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_87", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_88", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_89", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_90", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_92", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_93", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_95", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_97", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_99"
), class = "factor")), .Names = "united_sample_names", row.names = c(NA, 
10L), class = "data.frame")

df 
#>                                 united_sample_names
#> 1  Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_86
#> 2  Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_87
#> 3  Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_88
#> 4  Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_89
#> 5  Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_90
#> 6  Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_92
#> 7  Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_93
#> 8  Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_95
#> 9  Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_97
#> 10 Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_99

我想要做的是将其拆分为:

   header1    header2                         header3                         
   Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_86
   Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_87
   Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_88
   Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_89
   Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_90
   Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_92
   Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_93
   Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_95
   Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_97
   Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_99

我试了这个并且失败了:

> df %>% separate(united_sample_names, c("header1","header2","header3"))
   header1 header2 header3
1  Vehicle      iv  170414
2  Vehicle      iv  170414
3  Vehicle      iv  170414
4  Vehicle      iv  170414
5  Vehicle      iv  170414
6  Vehicle      iv  170414
7  Vehicle      iv  170414
8  Vehicle      iv  170414
9  Vehicle      iv  170414
10 Vehicle      iv  170414
Warning message:
Too many values at 10 locations: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 

做正确的方法是什么?

1 个答案:

答案 0 :(得分:1)

我们需要指定sep。否则,它会自动检测某些其他字符,例如_,并将列分开。

library(tidyr)
df %>%
  separate(united_sample_names, c("header1","header2","header3"), sep="[.]")
#      header1                         header2 header3
#1  Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2   KK_86
#2  Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2   KK_87
#3  Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2   KK_88
#4  Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2   KK_89
#5  Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2   KK_90
#6  Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2   KK_92
#7  Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2   KK_93
#8  Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2   KK_95
#9  Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2   KK_97
#10 Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2   KK_99