使用readLine
后出现问题。我希望将匹配的数据与给定字符进行子集化。
dput(data[1:31]) ## line 1 to 31
c("; Tue, 28 Oct 2014, 10:52", "", "; original file: H:\\Rubber_join\\part_1\\part_1.txt",
"; no sorting", "", "name = Pop_Rubber", "popt = CP", "nloc = 5000",
"nind = 149", "", "448814085_2906 <lmxll> ; 1", " ll ll ll ll -- ll -- lm lm lm -- lm ll lm lm lm lm lm ll lm -- ll ll ll lm",
" lm ll lm ll lm ll lm lm lm lm lm lm -- lm ll lm lm ll lm ll ll lm lm lm lm",
" ll lm lm ll ll lm ll ll ll ll lm ll lm lm lm ll -- lm ll lm -- ll ll lm --",
" ll ll -- lm ll ll lm ll lm ll ll lm -- ll ll ll ll lm lm ll ll lm lm -- lm",
" ll -- lm lm lm ll lm ll lm lm -- lm ll ll ll -- lm lm ll ll ll lm ll lm lm",
" ll lm ll ll lm ll ll ll -- ll ll ll ll ll ll ll ll -- ll lm ll lm ll ll",
"448814085_3447 <lmxll> ; 2", " ll ll ll ll -- ll lm -- lm -- lm lm ll lm lm lm lm lm ll -- lm ll ll ll --",
" lm ll lm ll lm ll lm lm lm lm lm lm lm lm ll lm lm ll lm ll ll lm lm lm --",
" ll lm lm ll ll lm ll ll ll ll -- ll -- -- lm ll lm lm ll lm -- ll ll lm lm",
" ll ll -- lm ll ll lm ll lm ll ll -- lm ll -- ll ll lm lm ll ll lm -- lm lm",
" ll lm lm lm lm ll lm ll lm lm -- lm ll ll ll -- lm lm ll ll ll lm ll lm --",
" ll lm ll ll lm ll ll -- lm ll ll ll ll ll ll ll ll lm ll lm ll lm ll ll",
"448814085_3491 <lmxll> ; 3", " ll ll ll ll -- ll lm lm lm lm lm lm ll lm lm lm lm lm ll -- lm ll ll ll lm",
" lm ll lm ll -- ll lm lm lm -- lm lm lm lm ll lm lm ll lm ll ll lm lm lm --",
" ll lm lm ll ll lm ll ll ll ll -- ll lm lm -- ll lm lm ll lm -- ll ll lm lm",
" ll ll -- lm ll ll lm ll lm ll ll -- lm ll ll lm ll lm lm ll ll lm lm lm lm",
" ll lm -- lm -- ll lm ll lm -- -- lm ll ll ll -- lm lm ll ll ll lm ll -- lm",
" ll lm ll ll -- ll ll ll lm ll ll ll ll ll ll ll ll lm ll lm ll lm ll ll"
)
并且匹配文件是:
name[1:5,]
# Exclude Nr Locus
# 1 0 1 448814085_2906
# 2 0 2 448814085_3447
# 3 0 3 448814085_3491
# 4 0 4 448814085_3510
# 5 0 5 448814085_3566
matchline <- sapply(name, function(x) grep(x, data)) ## so i get the matchline
但是,在matchline
中,还有其他我想要的信息,它包含在匹配行后的六行中。
这是我尝试过的代码:
answer <- sapply(matchline, function(x) data[x:x+6]) ## but it doesn't give the right answer.
我该怎样做才能解决问题?正确答案应假设1和2的数据匹配
[11] "448814085_2906 <lmxll> ; 1"
[12] " ll ll ll ll -- ll -- lm lm lm -- lm ll lm lm lm lm lm ll lm -- ll ll ll lm"
[13] " lm ll lm ll lm ll lm lm lm lm lm lm -- lm ll lm lm ll lm ll ll lm lm lm lm"
[14] " ll lm lm ll ll lm ll ll ll ll lm ll lm lm lm ll -- lm ll lm -- ll ll lm --"
[15] " ll ll -- lm ll ll lm ll lm ll ll lm -- ll ll ll ll lm lm ll ll lm lm -- lm"
[16] " ll -- lm lm lm ll lm ll lm lm -- lm ll ll ll -- lm lm ll ll ll lm ll lm lm"
[17] " ll lm ll ll lm ll ll ll -- ll ll ll ll ll ll ll ll -- ll lm ll lm ll ll"
[18] "448814085_3447 <lmxll> ; 2"
[19] " ll ll ll ll -- ll lm -- lm -- lm lm ll lm lm lm lm lm ll -- lm ll ll ll --"
[20] " lm ll lm ll lm ll lm lm lm lm lm lm lm lm ll lm lm ll lm ll ll lm lm lm --"
[21] " ll lm lm ll ll lm ll ll ll ll -- ll -- -- lm ll lm lm ll lm -- ll ll lm lm"
[22] " ll ll -- lm ll ll lm ll lm ll ll -- lm ll -- ll ll lm lm ll ll lm -- lm lm"
[23] " ll lm lm lm lm ll lm ll lm lm -- lm ll ll ll -- lm lm ll ll ll lm ll lm --"
[24] " ll lm ll ll lm ll ll -- lm ll ll ll ll ll ll ll ll lm ll lm ll lm ll ll"
答案 0 :(得分:0)
我想这会起作用
indx <- unlist(sapply(name$Locus,function(x)grep(x,data)))
res <- data[sort(indx+rep(0:6,each=length(indx)))]
res
# [1] "448814085_2906 <lmxll> ; 1"
#[2] " ll ll ll ll -- ll -- lm lm lm -- lm ll lm lm lm lm lm ll lm -- ll ll ll lm"
#[3] " lm ll lm ll lm ll lm lm lm lm lm lm -- lm ll lm lm ll lm ll ll lm lm lm lm"
#[4] " ll lm lm ll ll lm ll ll ll ll lm ll lm lm lm ll -- lm ll lm -- ll ll lm --"
#[5] " ll ll -- lm ll ll lm ll lm ll ll lm -- ll ll ll ll lm lm ll ll lm lm -- lm"
#[6] " ll -- lm lm lm ll lm ll lm lm -- lm ll ll ll -- lm lm ll ll ll lm ll lm lm"
#[7] " ll lm ll ll lm ll ll ll -- ll ll ll ll ll ll ll ll -- ll lm ll lm ll ll"
#[8] "448814085_3447 <lmxll> ; 2"
#[9] " ll ll ll ll -- ll lm -- lm -- lm lm ll lm lm lm lm lm ll -- lm ll ll ll --"
#[10] " lm ll lm ll lm ll lm lm lm lm lm lm lm lm ll lm lm ll lm ll ll lm lm lm --"
#[11] " ll lm lm ll ll lm ll ll ll ll -- ll -- -- lm ll lm lm ll lm -- ll ll lm lm"
#[12] " ll ll -- lm ll ll lm ll lm ll ll -- lm ll -- ll ll lm lm ll ll lm -- lm lm"
#[13] " ll lm lm lm lm ll lm ll lm lm -- lm ll ll ll -- lm lm ll ll ll lm ll lm --"
#[14] " ll lm ll ll lm ll ll -- lm ll ll ll ll ll ll ll ll lm ll lm ll lm ll ll"
#[15] "448814085_3491 <lmxll> ; 3"
#[16] " ll ll ll ll -- ll lm lm lm lm lm lm ll lm lm lm lm lm ll -- lm ll ll ll lm"
#[17] " lm ll lm ll -- ll lm lm lm -- lm lm lm lm ll lm lm ll lm ll ll lm lm lm --"
#[18] " ll lm lm ll ll lm ll ll ll ll -- ll lm lm -- ll lm lm ll lm -- ll ll lm lm"
#[19] " ll ll -- lm ll ll lm ll lm ll ll -- lm ll ll lm ll lm lm ll ll lm lm lm lm"
#[20] " ll lm -- lm -- ll lm ll lm -- -- lm ll ll ll -- lm lm ll ll ll lm ll -- lm"
#[21] " ll lm ll ll -- ll ll ll lm ll ll ll ll ll ll ll ll lm ll lm ll lm ll ll"
name <- structure(list(Exclude = c(0L, 0L, 0L, 0L, 0L), Nr = 1:5,
Locus = c("448814085_2906", "448814085_3447", "448814085_3491", "448814085_3510",
"448814085_3566")), .Names = c("Exclude", "Nr", "Locus"), class = "data.frame",
row.names = c("1", "2", "3", "4", "5"))