我一直在尝试使用akka流和QPID消息代理创建一个运行递归任务(crawler)的应用程序。 我注意到的是,图表的单独部分单独执行得非常好,但是当连接在一起时,性能显着下降。 以下是本地计算机上运行的图表的统计信息:
管道的源代码可以在这里找到: https://gist.github.com/volisoft/3617824b16a3f3b6e01c933a8bdf8049
管道很简单:
def main(args: Array[String]): Unit = {
startBroker()
val queueName = "amqp-conn-it-spec-simple-queue-" + System.currentTimeMillis()
val queueDeclaration = QueueDeclaration(queueName)
val in = AmqpSource(
NamedQueueSourceSettings(AmqpConnectionDetails("localhost", 5672, Some(AmqpCredentials("guest", "guest"))), queueName)
.withDeclarations(queueDeclaration),
bufferSize = 1028
).map(_.bytes.utf8String).log(":in")
val out = AmqpSink.simple(
AmqpSinkSettings(AmqpConnectionDetails("localhost", 5672, Some(AmqpCredentials("guest", "guest"))))
.withRoutingKey(queueName).withDeclarations(queueDeclaration))
val urlsSink = Flow[String].map(ByteString(_)).to(out)
val g = RunnableGraph.fromGraph(GraphDSL.create(in, urlsSink)((_,_)){ implicit b => (in, urlsSink0) =>
import GraphDSL.Implicits._
val pool = Http().superPool[String]()(materializer).log(":pool")
val download: Flow[String, Document, NotUsed] = Flow[String]
.map(url => (HttpRequest(method = HttpMethods.GET, Uri(url)), url) )
.via(pool)
.mapAsyncUnordered(8){ case (Success(response: HttpResponse), url) => parse(response, url)}
val filter = Flow[String].filter(notVisited).log(":filter")
val save = Flow[String].map(saveVisited)
val extractLinks: Flow[Document, String, NotUsed] = Flow[Document].mapConcat(document => getUrls(document))
in ~> save ~> download ~> extractLinks ~> filter ~> urlsSink0
ClosedShape
})
g.run()
Source.single(rootUrl).map(s => ByteString(s)).runWith(out)
}
如何优化此代码以提高性能?