在Nim中将迭代器作为参数传递失败“尝试调用未声明的例程”

时间:2018-01-15 09:52:59

标签: iterator nim

我正在尝试通过编写我已经用其他语言实现的某种生物信息学工具来学习Nim。

我有以下版本可以正确编译和运行:

from strutils import join
from sequtils import zip

type
  Nucleotides = distinct string
  Qualities = distinct string
  #Nucleotides = string
  #Qualities = string
  Fastq = tuple
    name: string
    nucls: Nucleotides
    quals: Qualities

# proc `==` (ns, ms: Nucleotides): bool =
#   string(ns) == string(ms)
# https://nim-by-example.github.io/types/distinct/
proc `==` (ns, ms: Nucleotides): bool {.borrow.}

proc makeFastq(name, nucls, quals: string): Fastq =
  result = (name: name, nucls: nucls.Nucleotides, quals: quals.Qualities)

proc bestQuals(quals1, quals2: string): string =
  let N = min(quals1.len, quals2.len)
  result = newStringOfCap(N)
  for pair in zip(quals1, quals2):
    result.add(chr(max(ord(pair.a), ord(pair.b))))

proc bestQuals(quals1, quals2: Qualities): Qualities =
  result = bestQuals(string(quals1), string(quals2)).Qualities

proc fuseFastq(rec1, rec2: Fastq): Fastq =
  result = (name: rec1.name, nucls: rec1.nucls, quals: bestQuals(rec1.quals, rec2.quals))

proc `$` (record: Fastq): string =
  result = join([
    record.name,
    string(record.nucls),
    "+",
    string(record.quals)], "\n")

iterator parseFastqs(input: File): Fastq =
  var
    nameLine: string
    nucLine: string
    quaLine: string
  while not input.endOfFile:
    nameLine = input.readLine()
    nucLine = input.readLine()
    discard input.readLine()
    quaLine = input.readLine()
    yield makeFastq(nameLine, nucLine, quaLine)

proc deduplicate() =
  var
    record: Fastq
  record = (name: "", nucls: "".Nucleotides, quals: "".Qualities)
  for fastq in parseFastqs(stdin):
    if record.nucls != fastq.nucls:
      if record.name != "":
        echo $record
      record = fastq
    else:
      record = fuseFastq(record, fastq)
      continue
  if record.name != "":
    echo $record

when isMainModule:
  deduplicate()

现在,我希望deduplicate将“事物”(当前是迭代器)作为参数,生成Fastq个元组。让when isMainModule部分处理从stdin读取或者将来可能还有别的东西(例如,作为命令行参数传递的文件)处理似乎更清晰:

proc deduplicate(inputFqs: <some relevant type>) =
  var
    record: Fastq
  record = (name: "", nucls: "".Nucleotides, quals: "".Qualities)
  for fastq in inputFqs:
    if record.nucls != fastq.nucls:
      if record.name != "":
        echo $record
      record = fastq
    else:
      record = fuseFastq(record, fastq)
      continue
  if record.name != "":
    echo $record

when isMainModule:
  let inputFqs = parseFastqs(stdin)
  deduplicate(inputFqs)

有一种简单有效的方法吗?

我天真地尝试了以下方法:

proc deduplicate(inputFqs: iterator) =
  var
    record: Fastq
  record = (name: "", nucls: "".Nucleotides, quals: "".Qualities)
  for fastq in inputFqs:
    if record.nucls != fastq.nucls:
      if record.name != "":
        echo $record
      record = fastq
    else:
      record = fuseFastq(record, fastq)
      continue
  if record.name != "":
    echo $record

when isMainModule:
  let inputFqs = parseFastqs(stdin)
  deduplicate(inputFqs)

这会导致以下编译错误:Error: attempting to call undeclared routine: 'parseFastqs'

我从manual搜索并理解我应该使我的迭代器成为闭包迭代器。所以我开始只是使用{.closure.}编译指示:

iterator parseFastqs(input: File): Fastq {.closure.} =

但我一直有同样的错误。

所以我试着更仔细地模仿手册中给出的例子:

iterator parseFastqs(input: File): Fastq {.closure.} =
  var
    nameLine: string
    nucLine: string
    quaLine: string
  while not input.endOfFile:
    nameLine = input.readLine()
    nucLine = input.readLine()
    discard input.readLine()
    quaLine = input.readLine()
    yield makeFastq(nameLine, nucLine, quaLine)

proc deduplicate(inputFqs: iterator(): Fastq {.closure.}) =
  var
    record: Fastq
  record = (name: "", nucls: "".Nucleotides, quals: "".Qualities)
  for fastq in inputFqs:
    if record.nucls != fastq.nucls:
      if record.name != "":
        echo $record
      record = fastq
    else:
      record = fuseFastq(record, fastq)
      continue
  if record.name != "":
    echo $record

deduplicate(parseFastqs(stdin))

这导致了类型错误:

Error: type mismatch: got (iterator (): Fastq{.closure.})
but expected one of: 
iterator items[T](a: set[T]): T
iterator items(a: cstring): char
iterator items[T](a: openArray[T]): T
iterator items[IX, T](a: array[IX, T]): T
iterator items(a: string): char
iterator items[T](a: seq[T]): T
iterator items(E: typedesc[enum]): E:type
iterator items[T](s: HSlice[T, T]): T

expression: items(inputFqs)

我做错了什么?

编辑:解决类型问题

似乎可以通过将for fastq in inputFqs:更改为for fastq in inputFqs():来解决类型不匹配问题。情况又回到Error: attempting to call undeclared routine: 'parseFastqs'

一些修补手册中的示例表明,迭代器参数的类型不需要括号。以下编译并正确运行:

iterator count0(): int {.closure.} =
  yield 0

iterator count2(): int {.closure.} =
  var x = 1
  yield x
  inc x
  yield x

proc invoke(iter: iterator) =
  for x in iter(): echo x

invoke(count0)
invoke(count2)

现在我会对原始示例中括号的含义感兴趣:

proc invoke(iter: iterator(): int {.closure.}) =

1 个答案:

答案 0 :(得分:0)

您必须循环 iterator

for item in myIterator():
  echo repr item

或者您可以将其转换为序列

import sequtils
echo toSeq(myIterator())