readline函数后的子集数据与给定字符匹配

时间:2014-10-28 06:30:21

标签: r

使用readLine后出现问题。我希望将匹配的数据与给定字符进行子集化。

dput(data[1:31]) ## line 1 to 31

   c("; Tue, 28 Oct 2014, 10:52", "", "; original file:          H:\\Rubber_join\\part_1\\part_1.txt", 
"; no sorting", "", "name = Pop_Rubber", "popt = CP", "nloc = 5000", 
"nind = 149", "", "448814085_2906       <lmxll> ; 1", "   ll ll ll ll --  ll -- lm lm lm  -- lm ll lm lm  lm lm lm ll lm  -- ll ll ll lm", 
"   lm ll lm ll lm  ll lm lm lm lm  lm lm -- lm ll  lm lm ll lm ll  ll lm lm lm lm", 
"   ll lm lm ll ll  lm ll ll ll ll  lm ll lm lm lm  ll -- lm ll lm  -- ll ll lm --", 
"   ll ll -- lm ll  ll lm ll lm ll  ll lm -- ll ll  ll ll lm lm ll  ll lm lm -- lm", 
"   ll -- lm lm lm  ll lm ll lm lm  -- lm ll ll ll  -- lm lm ll ll  ll lm ll lm lm", 
"   ll lm ll ll lm  ll ll ll -- ll  ll ll ll ll ll  ll ll -- ll lm  ll lm ll ll", 
"448814085_3447       <lmxll> ; 2", "   ll ll ll ll --  ll lm -- lm --  lm lm ll lm lm  lm lm lm ll --  lm ll ll ll --", 
"   lm ll lm ll lm  ll lm lm lm lm  lm lm lm lm ll  lm lm ll lm ll  ll lm lm lm --", 
"   ll lm lm ll ll  lm ll ll ll ll  -- ll -- -- lm  ll lm lm ll lm  -- ll ll lm lm", 
"   ll ll -- lm ll  ll lm ll lm ll  ll -- lm ll --  ll ll lm lm ll  ll lm -- lm lm", 
"   ll lm lm lm lm  ll lm ll lm lm  -- lm ll ll ll  -- lm lm ll ll  ll lm ll lm --", 
"   ll lm ll ll lm  ll ll -- lm ll  ll ll ll ll ll  ll ll lm ll lm  ll lm ll ll", 
"448814085_3491       <lmxll> ; 3", "   ll ll ll ll --  ll lm lm lm lm  lm lm ll lm lm  lm lm lm ll --  lm ll ll ll lm", 
"   lm ll lm ll --  ll lm lm lm --  lm lm lm lm ll  lm lm ll lm ll  ll lm lm lm --", 
"   ll lm lm ll ll  lm ll ll ll ll  -- ll lm lm --  ll lm lm ll lm  -- ll ll lm lm", 
"   ll ll -- lm ll  ll lm ll lm ll  ll -- lm ll ll  lm ll lm lm ll  ll lm lm lm lm", 
"   ll lm -- lm --  ll lm ll lm --  -- lm ll ll ll  -- lm lm ll ll  ll lm ll -- lm", 
"   ll lm ll ll --  ll ll ll lm ll  ll ll ll ll ll  ll ll lm ll lm  ll lm ll ll"
)

并且匹配文件是:

name[1:5,]            

#   Exclude Nr          Locus
# 1       0  1 448814085_2906
# 2       0  2 448814085_3447
# 3       0  3 448814085_3491
# 4       0  4 448814085_3510
# 5       0  5 448814085_3566 


matchline <- sapply(name, function(x) grep(x, data)) ## so i get the matchline

但是,在matchline中,还有其他我想要的信息,它包含在匹配行后的六行中。

这是我尝试过的代码:

answer <- sapply(matchline, function(x) data[x:x+6]) ## but it doesn't give the right answer.

我该怎样做才能解决问题?正确答案应假设1和2的数据匹配

[11] "448814085_2906       <lmxll> ; 1"                                                 
[12] "   ll ll ll ll --  ll -- lm lm lm  -- lm ll lm lm  lm lm lm ll lm  -- ll ll ll lm"
[13] "   lm ll lm ll lm  ll lm lm lm lm  lm lm -- lm ll  lm lm ll lm ll  ll lm lm lm lm"
[14] "   ll lm lm ll ll  lm ll ll ll ll  lm ll lm lm lm  ll -- lm ll lm  -- ll ll lm --"
[15] "   ll ll -- lm ll  ll lm ll lm ll  ll lm -- ll ll  ll ll lm lm ll  ll lm lm -- lm"
[16] "   ll -- lm lm lm  ll lm ll lm lm  -- lm ll ll ll  -- lm lm ll ll  ll lm ll lm lm"
[17] "   ll lm ll ll lm  ll ll ll -- ll  ll ll ll ll ll  ll ll -- ll lm  ll lm ll ll"   
[18] "448814085_3447       <lmxll> ; 2"                                                 
[19] "   ll ll ll ll --  ll lm -- lm --  lm lm ll lm lm  lm lm lm ll --  lm ll ll ll --"
[20] "   lm ll lm ll lm  ll lm lm lm lm  lm lm lm lm ll  lm lm ll lm ll  ll lm lm lm --"
[21] "   ll lm lm ll ll  lm ll ll ll ll  -- ll -- -- lm  ll lm lm ll lm  -- ll ll lm lm"
[22] "   ll ll -- lm ll  ll lm ll lm ll  ll -- lm ll --  ll ll lm lm ll  ll lm -- lm lm"
[23] "   ll lm lm lm lm  ll lm ll lm lm  -- lm ll ll ll  -- lm lm ll ll  ll lm ll lm --"
[24] "   ll lm ll ll lm  ll ll -- lm ll  ll ll ll ll ll  ll ll lm ll lm  ll lm ll ll"   

1 个答案:

答案 0 :(得分:0)

我想这会起作用

 indx <- unlist(sapply(name$Locus,function(x)grep(x,data)))
 res <- data[sort(indx+rep(0:6,each=length(indx)))]
 res
 # [1] "448814085_2906       <lmxll> ; 1"                                                 
 #[2] "   ll ll ll ll --  ll -- lm lm lm  -- lm ll lm lm  lm lm lm ll lm  -- ll ll ll lm"
 #[3] "   lm ll lm ll lm  ll lm lm lm lm  lm lm -- lm ll  lm lm ll lm ll  ll lm lm lm lm"
 #[4] "   ll lm lm ll ll  lm ll ll ll ll  lm ll lm lm lm  ll -- lm ll lm  -- ll ll lm --"
 #[5] "   ll ll -- lm ll  ll lm ll lm ll  ll lm -- ll ll  ll ll lm lm ll  ll lm lm -- lm"
 #[6] "   ll -- lm lm lm  ll lm ll lm lm  -- lm ll ll ll  -- lm lm ll ll  ll lm ll lm lm"
 #[7] "   ll lm ll ll lm  ll ll ll -- ll  ll ll ll ll ll  ll ll -- ll lm  ll lm ll ll"   
 #[8] "448814085_3447       <lmxll> ; 2"                                                 
 #[9] "   ll ll ll ll --  ll lm -- lm --  lm lm ll lm lm  lm lm lm ll --  lm ll ll ll --"
 #[10] "   lm ll lm ll lm  ll lm lm lm lm  lm lm lm lm ll  lm lm ll lm ll  ll lm lm lm --"
 #[11] "   ll lm lm ll ll  lm ll ll ll ll  -- ll -- -- lm  ll lm lm ll lm  -- ll ll lm lm"
 #[12] "   ll ll -- lm ll  ll lm ll lm ll  ll -- lm ll --  ll ll lm lm ll  ll lm -- lm lm"
 #[13] "   ll lm lm lm lm  ll lm ll lm lm  -- lm ll ll ll  -- lm lm ll ll  ll lm ll lm --"
 #[14] "   ll lm ll ll lm  ll ll -- lm ll  ll ll ll ll ll  ll ll lm ll lm  ll lm ll ll"   
 #[15] "448814085_3491       <lmxll> ; 3"                                                 
 #[16] "   ll ll ll ll --  ll lm lm lm lm  lm lm ll lm lm  lm lm lm ll --  lm ll ll ll lm"
 #[17] "   lm ll lm ll --  ll lm lm lm --  lm lm lm lm ll  lm lm ll lm ll  ll lm lm lm --"
 #[18] "   ll lm lm ll ll  lm ll ll ll ll  -- ll lm lm --  ll lm lm ll lm  -- ll ll lm lm"
 #[19] "   ll ll -- lm ll  ll lm ll lm ll  ll -- lm ll ll  lm ll lm lm ll  ll lm lm lm lm"
 #[20] "   ll lm -- lm --  ll lm ll lm --  -- lm ll ll ll  -- lm lm ll ll  ll lm ll -- lm"
 #[21] "   ll lm ll ll --  ll ll ll lm ll  ll ll ll ll ll  ll ll lm ll lm  ll lm ll ll"  

数据

name <- structure(list(Exclude = c(0L, 0L, 0L, 0L, 0L), Nr = 1:5, 
Locus =   c("448814085_2906", "448814085_3447", "448814085_3491", "448814085_3510",
"448814085_3566")), .Names = c("Exclude", "Nr", "Locus"), class = "data.frame",
row.names = c("1", "2", "3", "4", "5"))