我正在尝试通过编写我已经用其他语言实现的某种生物信息学工具来学习Nim。
我有以下版本可以正确编译和运行:
from strutils import join
from sequtils import zip
type
Nucleotides = distinct string
Qualities = distinct string
#Nucleotides = string
#Qualities = string
Fastq = tuple
name: string
nucls: Nucleotides
quals: Qualities
# proc `==` (ns, ms: Nucleotides): bool =
# string(ns) == string(ms)
# https://nim-by-example.github.io/types/distinct/
proc `==` (ns, ms: Nucleotides): bool {.borrow.}
proc makeFastq(name, nucls, quals: string): Fastq =
result = (name: name, nucls: nucls.Nucleotides, quals: quals.Qualities)
proc bestQuals(quals1, quals2: string): string =
let N = min(quals1.len, quals2.len)
result = newStringOfCap(N)
for pair in zip(quals1, quals2):
result.add(chr(max(ord(pair.a), ord(pair.b))))
proc bestQuals(quals1, quals2: Qualities): Qualities =
result = bestQuals(string(quals1), string(quals2)).Qualities
proc fuseFastq(rec1, rec2: Fastq): Fastq =
result = (name: rec1.name, nucls: rec1.nucls, quals: bestQuals(rec1.quals, rec2.quals))
proc `$` (record: Fastq): string =
result = join([
record.name,
string(record.nucls),
"+",
string(record.quals)], "\n")
iterator parseFastqs(input: File): Fastq =
var
nameLine: string
nucLine: string
quaLine: string
while not input.endOfFile:
nameLine = input.readLine()
nucLine = input.readLine()
discard input.readLine()
quaLine = input.readLine()
yield makeFastq(nameLine, nucLine, quaLine)
proc deduplicate() =
var
record: Fastq
record = (name: "", nucls: "".Nucleotides, quals: "".Qualities)
for fastq in parseFastqs(stdin):
if record.nucls != fastq.nucls:
if record.name != "":
echo $record
record = fastq
else:
record = fuseFastq(record, fastq)
continue
if record.name != "":
echo $record
when isMainModule:
deduplicate()
现在,我希望deduplicate
将“事物”(当前是迭代器)作为参数,生成Fastq
个元组。让when isMainModule
部分处理从stdin读取或者将来可能还有别的东西(例如,作为命令行参数传递的文件)处理似乎更清晰:
proc deduplicate(inputFqs: <some relevant type>) =
var
record: Fastq
record = (name: "", nucls: "".Nucleotides, quals: "".Qualities)
for fastq in inputFqs:
if record.nucls != fastq.nucls:
if record.name != "":
echo $record
record = fastq
else:
record = fuseFastq(record, fastq)
continue
if record.name != "":
echo $record
when isMainModule:
let inputFqs = parseFastqs(stdin)
deduplicate(inputFqs)
有一种简单有效的方法吗?
我天真地尝试了以下方法:
proc deduplicate(inputFqs: iterator) =
var
record: Fastq
record = (name: "", nucls: "".Nucleotides, quals: "".Qualities)
for fastq in inputFqs:
if record.nucls != fastq.nucls:
if record.name != "":
echo $record
record = fastq
else:
record = fuseFastq(record, fastq)
continue
if record.name != "":
echo $record
when isMainModule:
let inputFqs = parseFastqs(stdin)
deduplicate(inputFqs)
这会导致以下编译错误:Error: attempting to call undeclared routine: 'parseFastqs'
。
我从manual搜索并理解我应该使我的迭代器成为闭包迭代器。所以我开始只是使用{.closure.}
编译指示:
iterator parseFastqs(input: File): Fastq {.closure.} =
但我一直有同样的错误。
所以我试着更仔细地模仿手册中给出的例子:
iterator parseFastqs(input: File): Fastq {.closure.} =
var
nameLine: string
nucLine: string
quaLine: string
while not input.endOfFile:
nameLine = input.readLine()
nucLine = input.readLine()
discard input.readLine()
quaLine = input.readLine()
yield makeFastq(nameLine, nucLine, quaLine)
proc deduplicate(inputFqs: iterator(): Fastq {.closure.}) =
var
record: Fastq
record = (name: "", nucls: "".Nucleotides, quals: "".Qualities)
for fastq in inputFqs:
if record.nucls != fastq.nucls:
if record.name != "":
echo $record
record = fastq
else:
record = fuseFastq(record, fastq)
continue
if record.name != "":
echo $record
deduplicate(parseFastqs(stdin))
这导致了类型错误:
Error: type mismatch: got (iterator (): Fastq{.closure.})
but expected one of:
iterator items[T](a: set[T]): T
iterator items(a: cstring): char
iterator items[T](a: openArray[T]): T
iterator items[IX, T](a: array[IX, T]): T
iterator items(a: string): char
iterator items[T](a: seq[T]): T
iterator items(E: typedesc[enum]): E:type
iterator items[T](s: HSlice[T, T]): T
expression: items(inputFqs)
我做错了什么?
似乎可以通过将for fastq in inputFqs:
更改为for fastq in inputFqs():
来解决类型不匹配问题。情况又回到Error: attempting to call undeclared routine: 'parseFastqs'
。
一些修补手册中的示例表明,迭代器参数的类型不需要括号。以下编译并正确运行:
iterator count0(): int {.closure.} =
yield 0
iterator count2(): int {.closure.} =
var x = 1
yield x
inc x
yield x
proc invoke(iter: iterator) =
for x in iter(): echo x
invoke(count0)
invoke(count2)
现在我会对原始示例中括号的含义感兴趣:
proc invoke(iter: iterator(): int {.closure.}) =
答案 0 :(得分:0)
您必须循环 iterator
for item in myIterator():
echo repr item
或者您可以将其转换为序列
import sequtils
echo toSeq(myIterator())