从多个源文件流式传输文本行

时间:2017-12-13 05:49:36

标签: akka-stream

我正在使用akka-stream实现一个解决方案来从多个文件中读取文本行,并提出了以下impl:

      def main(args: Array[String]): Unit = {
        val g: Flow[String, Unit, NotUsed] = Flow.fromGraph(GraphDSL.create() {
          implicit builder =>
            import GraphDSL.Implicits._

            val A = builder.add(doQuery)
            val B = builder.add(analyzeResult)
            A ~> B
            FlowShape(A.in, B.out)
        })

        val files = Source(fileNames)
        val lines = files.map(file =>
          Source.fromIterator(() => Source.fromFile(file.getName, "UTF-8").getLines)
        )

        val done = lines.runForeach(g.runWith(_, Sink.ignore))
        //    implicit val ec = system.dispatcher
        //    done.onComplete(_ => system.terminate())
      }

      val fileNames: List[File] = ???

      val doQuery = Flow[String]
        .groupedWithin(1000, 100 millisecond)
        .mapAsync(4)(x =>
          Future[Seq[String]] {
            synchronized {
              // Do Something
              Nil
            }
          }
        )

      val analyzeResult: Flow[Seq[String], Unit, NotUsed] = ???

如果有更好的解决方案,任何人都可以提供意见/反馈吗? 我不希望有两个Source(文件列表和第二个来自每个文件的文本行)。想知道如何只有一个单一的来源(文件列表)...... TIA!

1 个答案:

答案 0 :(得分:0)

val g: Flow[File, Unit, NotUsed] = Flow.fromGraph(
  GraphDSL.create() {
    implicit builder =>
      import GraphDSL.Implicits._

      val A = builder.add(Flow[File]
        .flatMapConcat {
          Source.fromIterator(() => 
            scala.io.Source.fromFile(s"${file.getAbsolutePath}", "UTF-8").getLines)
        }
      )
      val B = builder.add(doQuery)
      val C = builder.add(analyzeResult)

      A ~> B ~> C
      FlowShape(A.in, C.out)
  }
)

val files = Source(fileNames)

val (_: NotUsed, done: Future[Done]) = g.runWith(files, Sink.ignore)
implicit val ec = system.dispatcher
done.onComplete(_ => system.terminate())