你会如何改进这个Julia代码?

时间:2018-01-03 20:00:09

标签: python performance julia

seqs = Set{String}()
for ln in eachline(STDIN)
    if ln[1] == '@'
        println(ln)
        continue
    end
    seq = split(ln)[10]
    if !(seq in seqs)
        println(ln)
        push!(seqs,seq)
    end
end

它比下面的Python版慢得多(5倍),为什么呢?注意: seq 仅包含ASCII字母。

import sys

seqs = set()
for ln in sys.stdin:
    if ln.startswith('@'):
        print ln,
        continue
    seq = ln.split()[9]
    if not seq in seqs:
        print ln,
        seqs.add(seq)

1 个答案:

答案 0 :(得分:1)

评论中的建议给出了:

function foo()
    seqs = Set{String}()
    for ln in eachline(STDIN)
        if startswith(ln, '@')
            println(ln)
            continue
        end
        seq = split(ln)[10]
        if !(seq in seqs)
            println(ln)
            push!(seqs,seq)
        end
    end
    return seqs
end

一个可能更快的变体,它依赖于Set内部只是Dict的事实:

function foo()
    seqs = Set{String}()
    for ln in eachline(STDIN)
        if startswith(ln, '@')
            println(ln)
            continue
        end
        seq = split(ln)[10]
        get!(seqs.dict, seq) do
            println(ln)
            nothing
        end
    end
    return seqs
end