我正在使用akka-stream实现一个解决方案来从多个文件中读取文本行,并提出了以下impl:
def main(args: Array[String]): Unit = {
val g: Flow[String, Unit, NotUsed] = Flow.fromGraph(GraphDSL.create() {
implicit builder =>
import GraphDSL.Implicits._
val A = builder.add(doQuery)
val B = builder.add(analyzeResult)
A ~> B
FlowShape(A.in, B.out)
})
val files = Source(fileNames)
val lines = files.map(file =>
Source.fromIterator(() => Source.fromFile(file.getName, "UTF-8").getLines)
)
val done = lines.runForeach(g.runWith(_, Sink.ignore))
// implicit val ec = system.dispatcher
// done.onComplete(_ => system.terminate())
}
val fileNames: List[File] = ???
val doQuery = Flow[String]
.groupedWithin(1000, 100 millisecond)
.mapAsync(4)(x =>
Future[Seq[String]] {
synchronized {
// Do Something
Nil
}
}
)
val analyzeResult: Flow[Seq[String], Unit, NotUsed] = ???
如果有更好的解决方案,任何人都可以提供意见/反馈吗? 我不希望有两个Source(文件列表和第二个来自每个文件的文本行)。想知道如何只有一个单一的来源(文件列表)...... TIA!
答案 0 :(得分:0)
val g: Flow[File, Unit, NotUsed] = Flow.fromGraph(
GraphDSL.create() {
implicit builder =>
import GraphDSL.Implicits._
val A = builder.add(Flow[File]
.flatMapConcat {
Source.fromIterator(() =>
scala.io.Source.fromFile(s"${file.getAbsolutePath}", "UTF-8").getLines)
}
)
val B = builder.add(doQuery)
val C = builder.add(analyzeResult)
A ~> B ~> C
FlowShape(A.in, C.out)
}
)
val files = Source(fileNames)
val (_: NotUsed, done: Future[Done]) = g.runWith(files, Sink.ignore)
implicit val ec = system.dispatcher
done.onComplete(_ => system.terminate())