我使用scala 2.11构建了spark。我执行了以下步骤:
./dev/change-scala-version.sh 2.11
mvn -Pyarn -Phadoop-2.4 -Dscala-2.11 -DskipTests clean package
成功建立火花后,我尝试通过akka模型初始化火花。 所以,我的Main类看起来像:
ActorSystem system = ActorSystem.create("ClusterSystem");
Inbox inbox = Inbox.create(system);
ActorRef sparkActorRef = system.actorOf(SparkActor.props(mapOfArguments), "sparkActor");
inbox.send(sparkActorRef, "start");
火花演员看起来像:
public class SparkActor extends UntypedActor{
private static Logger logger = LoggerFactory.getLogger(SparkActor.class);
final Map<String,Object> configurations;
final SparkConf sparkConf;
private int sparkBatchDuration;
public static Props props(final Map<String,Object> configurations) {
return Props.create(new Creator<SparkActor>() {
private static final long serialVersionUID = 1L;
@Override
public SparkActor create() throws Exception {
return new SparkActor(configurations);
}
});
}
public SparkActor(Map<String,Object> configurations) {
this.configurations = configurations;
this.sparkConf =initializeSparkConf(configurations);
ActorRef mediator = DistributedPubSub.get(getContext().system()).mediator();
mediator.tell(new DistributedPubSubMediator.Subscribe("data", getSelf()), getSelf());
}
private SparkConf initializeSparkConf(Map<String, Object> mapOfArgs) {
SparkConf conf = new SparkConf();
Configuration sparkConf = (Configuration) mapOfArgs.get(StreamingConstants.MAP_SPARK_CONFIGURATION);
Iterator it = sparkConf.getKeys();
while(it.hasNext()){
String propertyKey = (String)it.next();
String propertyValue = sparkConf.getString(propertyKey);
conf.set(propertyKey.trim(), propertyValue.trim());
}
conf.setMaster(sparkConf.getString(StreamingConstants.SET_MASTER));
return conf;
}
@Override
public void onReceive(Object arg0) throws Exception {
if((arg0 instanceof String) & (arg0.toString().equalsIgnoreCase("start"))){
logger.info("Going to start");
sparkConf.setAppName(StreamingConstants.APP_NAME);
logger.debug("App name set to {}. Beginning spark execution",StreamingConstants.APP_NAME);
Configuration kafkaConfiguration = (Configuration) configurations.get(StreamingConstants.MAP_KAFKA_CONFIGURATION);
sparkBatchDuration = Integer.parseInt((String)configurations.get(StreamingConstants.MAP_SPARK_DURATION));
//Initializing Kafka configurations.
String[] eplTopicsAndThreads = kafkaConfiguration.getString(StreamingConstants.EPL_QUEUE).split(",");
Map<String,Integer> mapofeplTopicsAndThreads = new TreeMap<>();
for (String item : eplTopicsAndThreads){
String topic = item.split(StreamingConstants.EPL_QUEUE_SEPARATOR)[0];
Integer numberOfThreads= Integer.parseInt(item.split(StreamingConstants.EPL_QUEUE_SEPARATOR)[1]);
mapofeplTopicsAndThreads.put(topic, numberOfThreads);
}
//Creating a receiver stream in spark
JavaPairReceiverInputDStream<String,String> receiverStream = null;
JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(sparkBatchDuration));
receiverStream = KafkaUtils.createStream(ssc,
kafkaConfiguration.getString(StreamingConstants.ZOOKEEPER_SERVER_PROPERTY),
kafkaConfiguration.getString(StreamingConstants.KAFKA_GROUP_NAME),
mapofeplTopicsAndThreads);
JavaDStream<String> javaRdd = receiverStream.map(new SparkTaskTupleHelper());
javaRdd.foreachRDD(new Function<JavaRDD<String>, Void>() {
@Override
public Void call(JavaRDD<String> jsonData) throws Exception {
//Code to process some data from kafka
}
});
ssc.start();
ssc.awaitTermination();
}
}
我以
开始我的火花应用程序./ spark-submit --class com.sample.Main --master local [8] ../executables/spark-akka.jar
我在启动时遇到以下异常
Uncaught error from thread [ClusterSystem-akka.actor.default-dispatcher-3] shutting down JVM since 'akka.jvm-exit-on-fatal-error' is enabled for ActorSystem[ClusterSystem]
java.lang.NoSuchMethodError: scala.Predef$.ArrowAssoc(Ljava/lang/Object;)Ljava/lang/Object;
at akka.cluster.pubsub.protobuf.DistributedPubSubMessageSerializer.<init>(DistributedPubSubMessageSerializer.scala:42)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at akka.actor.ReflectiveDynamicAccess$$anonfun$createInstanceFor$2.apply(DynamicAccess.scala:78)
at scala.util.Try$.apply(Try.scala:161)
at akka.actor.ReflectiveDynamicAccess.createInstanceFor(DynamicAccess.scala:73)
at akka.actor.ReflectiveDynamicAccess$$anonfun$createInstanceFor$3.apply(DynamicAccess.scala:84)
at akka.actor.ReflectiveDynamicAccess$$anonfun$createInstanceFor$3.apply(DynamicAccess.scala:84)
at scala.util.Success.flatMap(Try.scala:200)
at akka.actor.ReflectiveDynamicAccess.createInstanceFor(DynamicAccess.scala:84)
at akka.serialization.Serialization.serializerOf(Serialization.scala:165)
at akka.serialization.Serialization$$anonfun$3.apply(Serialization.scala:174)
at akka.serialization.Serialization$$anonfun$3.apply(Serialization.scala:174)
at scala.collection.TraversableLike$WithFilter$$anonfun$map$2.apply(TraversableLike.scala:722)
at scala.collection.immutable.HashMap$HashMap1.foreach(HashMap.scala:224)
at scala.collection.immutable.HashMap$HashTrieMap.foreach(HashMap.scala:403)
at scala.collection.immutable.HashMap$HashTrieMap.foreach(HashMap.scala:403)
at scala.collection.TraversableLike$WithFilter.map(TraversableLike.scala:721)
at akka.serialization.Serialization.<init>(Serialization.scala:174)
at akka.serialization.SerializationExtension$.createExtension(SerializationExtension.scala:15)
at akka.serialization.SerializationExtension$.createExtension(SerializationExtension.scala:12)
at akka.actor.ActorSystemImpl.registerExtension(ActorSystem.scala:713)
at akka.actor.ExtensionId$class.apply(Extension.scala:79)
at akka.serialization.SerializationExtension$.apply(SerializationExtension.scala:12)
at akka.remote.RemoteActorRefProvider.init(RemoteActorRefProvider.scala:175)
at akka.actor.ActorSystemImpl.liftedTree2$1(ActorSystem.scala:620)
at akka.actor.ActorSystemImpl._start$lzycompute(ActorSystem.scala:617)
at akka.actor.ActorSystemImpl._start(ActorSystem.scala:617)
at akka.actor.ActorSystemImpl.start(ActorSystem.scala:634)
at akka.actor.ActorSystem$.apply(ActorSystem.scala:142)
at akka.actor.ActorSystem$.apply(ActorSystem.scala:119)
at org.apache.spark.util.AkkaUtils$.org$apache$spark$util$AkkaUtils$$doCreateActorSystem(AkkaUtils.scala:121)
at org.apache.spark.util.AkkaUtils$$anonfun$1.apply(AkkaUtils.scala:53)
at org.apache.spark.util.AkkaUtils$$anonfun$1.apply(AkkaUtils.scala:52)
at org.apache.spark.util.Utils$$anonfun$startServiceOnPort$1.apply$mcVI$sp(Utils.scala:1913)
at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:141)
at org.apache.spark.util.Utils$.startServiceOnPort(Utils.scala:1904)
at org.apache.spark.util.AkkaUtils$.createActorSystem(AkkaUtils.scala:55)
at org.apache.spark.rpc.akka.AkkaRpcEnvFactory.create(AkkaRpcEnv.scala:253)
at org.apache.spark.rpc.RpcEnv$.create(RpcEnv.scala:53)
at org.apache.spark.SparkEnv$.create(SparkEnv.scala:252)
at org.apache.spark.SparkEnv$.createDriverEnv(SparkEnv.scala:193)
at org.apache.spark.SparkContext.createSparkEnv(SparkContext.scala:277)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:450)
at org.apache.spark.streaming.StreamingContext$.createNewSparkContext(StreamingContext.scala:864)
at org.apache.spark.streaming.StreamingContext.<init>(StreamingContext.scala:81)
at org.apache.spark.streaming.api.java.JavaStreamingContext.<init>(JavaStreamingContext.scala:134)
at com.sample.SparkActor.onReceive(SparkActor.java:106)
at akka.actor.UntypedActor$$anonfun$receive$1.applyOrElse(UntypedActor.scala:167)
at akka.actor.Actor$class.aroundReceive(Actor.scala:467)
at akka.actor.UntypedActor.aroundReceive(UntypedActor.scala:97)
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516)
at akka.actor.ActorCell.invoke(ActorCell.scala:487)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238)
at akka.dispatch.Mailbox.run(Mailbox.scala:220)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:397)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
我已经尝试过的选项列表.. 1)使用akka版本2.4.4重建spark并获得toRootLowerCase的NoSuchMethodError 2)试图重用2.3.11的内置火花并在CLusterSettings.scala上仍然得到相同的异常
我在stackoverflow上查看了类似的错误,发现它是由于scala版本不匹配造成的。但是使用2.11并使用akka 2.4.4构建了所有内容我认为所有的jar都将使用相同的scala版本。
我错过了任何特定的步骤吗?
我的pom文件供您参考。
<packaging>jar</packaging>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<slf4j.version>1.7.6</slf4j.version>
<log4j.version>2.0-rc1</log4j.version>
<commons.cli.version>1.2</commons.cli.version>
<kafka.version>0.8.2.2</kafka.version>
<akka.version>2.4.4</akka.version>
<akka.version.old>2.4.4</akka.version.old>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>1.6.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>1.6.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>1.6.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-mllib_2.11</artifactId>
<version>1.6.1</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.11.8</version>
</dependency>
<dependency>
<groupId>com.typesafe.akka</groupId>
<artifactId>akka-actor_2.11</artifactId>
<version>${akka.version}</version>
</dependency>
<dependency>
<groupId>com.typesafe.akka</groupId>
<artifactId>akka-cluster_2.11</artifactId>
<version>${akka.version}</version>
</dependency>
<dependency>
<groupId>com.typesafe.akka</groupId>
<artifactId>akka-kernel_2.11</artifactId>
<version>${akka.version}</version>
</dependency>
<dependency>
<groupId>com.typesafe.akka</groupId>
<artifactId>akka-cluster-tools_2.11</artifactId>
<version>${akka.version}</version>
</dependency>
<dependency>
<groupId>com.typesafe.akka</groupId>
<artifactId>akka-remote_2.11</artifactId>
<version>2.4.4</version>
</dependency>
<dependency>
<groupId>com.typesafe.akka</groupId>
<artifactId>akka-slf4j_2.11</artifactId>
<version>2.4.4</version>
</dependency>
如果删除集群jar和distributedpubsub代码并使用普通远程处理,即akka.tcp,则不会显示错误。它在这种情况下工作正常。我想知道为什么distributedpubsub会抛出这个错误。