seqs = Set{String}()
for ln in eachline(STDIN)
if ln[1] == '@'
println(ln)
continue
end
seq = split(ln)[10]
if !(seq in seqs)
println(ln)
push!(seqs,seq)
end
end
它比下面的Python版慢得多(5倍),为什么呢?注意: seq 仅包含ASCII字母。
import sys
seqs = set()
for ln in sys.stdin:
if ln.startswith('@'):
print ln,
continue
seq = ln.split()[9]
if not seq in seqs:
print ln,
seqs.add(seq)
答案 0 :(得分:1)
评论中的建议给出了:
function foo()
seqs = Set{String}()
for ln in eachline(STDIN)
if startswith(ln, '@')
println(ln)
continue
end
seq = split(ln)[10]
if !(seq in seqs)
println(ln)
push!(seqs,seq)
end
end
return seqs
end
一个可能更快的变体,它依赖于Set
内部只是Dict
的事实:
function foo()
seqs = Set{String}()
for ln in eachline(STDIN)
if startswith(ln, '@')
println(ln)
continue
end
seq = split(ln)[10]
get!(seqs.dict, seq) do
println(ln)
nothing
end
end
return seqs
end