我在斯卡拉有一个项目,该项目模拟了我国的经济。我编写了一个消息传递系统,人们可以将其发送到环境并接收回来。我使用spark并行化基于agentId的发送消息。我还编写了一个简单的测试来测量代码执行时间。对于50个人来说,运行代码大约需要30秒。我需要减少代码执行时间。在这种情况下我该怎么办?哪种方法可以帮助我?
我使用combineByKey根据每个代理的密钥ID组合它们的消息。我想使用groupByKey,reduceByKey等。但是我还能采取什么措施来减少代码执行时间呢?这是下面的消息传递部分。
@transient def run_until(until: Int) {
println("RESUME Simulation " + this);
var messages: List[Message] = List()
// loop starts here
vaxt {
while (timer <= until) {
if (!GLOBAL.silent) println("timer = " + timer);
if (GLOBAL.RUN_SPARK) {
// s.setReceiveMessages(mx.getOrElse(s.id, List()))
simsSpark = simsSpark.mapValues { s =>
s.handleMessages()
s.run_until(timer)._1.asInstanceOf[SimO]
}.cache()
var dMessages: RDD[(AgentId, List[Message])] = simsSpark.flatMap(_._2.getMessages).map(x => (x.receiverId, x)).combineByKey(
(message: Message) => {
List(message)
},
(l: List[Message], message: Message) => {
message :: l
},
(l1: List[Message], l2: List[Message]) => {
l1 ::: l2
}
).cache()
// Environment answers immediatley for the next step
val envMessages: RDD[(AgentId, List[Message])] = dMessages.filter(_._1 == ENVIRONMENT_ID).flatMap(_._2).flatMap(handleEnvMessage).map(x => (x.receiverId, x)).combineByKey(
(message: Message) => {
List(message)
},
(l: List[Message], message: Message) => {
message :: l
},
(l1: List[Message], l2: List[Message]) => {
l1 ::: l2
}
).cache()
// Append environment to messages: important merge both together (groupByKey otherwise elements may be duplicated at join afterwards)
dMessages = dMessages.filter(_._1 != ENVIRONMENT_ID).union(envMessages).groupByKey().mapValues(_.flatten.toList)
dMessages = dMessages.cache()
simsSpark = simsSpark.leftOuterJoin(dMessages).mapValues { x =>
x._1.setReceiveMessages(x._2.getOrElse(List()))
x._1
}.persist()
if (!GLOBAL.silent) {
simsSpark.foreach(_._2.stat)
println()
println()
}
} else {
messages = messages.filter(_.receiverId == ENVIRONMENT_ID).flatMap(handleEnvMessage) ::: messages.filter(_.receiverId != ENVIRONMENT_ID)
val mx = messages.groupBy(_.receiverId)
messages = List()
market = market.map(m => {
m._2.setReceiveMessages(mx.getOrElse(m._2.id, List()))
m._2.handleMessages()
m
})
messages = market.flatMap(_._2.getMessages).toList ::: messages
sims = sims.map {
s =>
s.setReceiveMessages(mx.getOrElse(s.id, List()))
s.handleMessages()
s.run_until(timer)._1.asInstanceOf[SimO]
}
messages = sims.flatMap(_.getMessages) ::: messages
if (!GLOBAL.silent) {
for (s <- sims) s.stat;
println();
println();
}
}
timer += 1;
}
}
//loop finish here
println("STOP Simulation " + this);
}