无法解析符号JavaSparkSessionSingleton

时间:2017-05-05 03:09:58

标签: java maven apache-spark apache-kafka spark-streaming

我是Spark流媒体的新手。我想要实现的是从kafka读取json字符串数据,将其存储在DStream中并将其转换为Dataset以便能够将其加载到Elasticsearch中。我正在使用此post中的部分代码。

这是实际代码:

            import org.apache.spark.SparkConf;
            import org.apache.spark.api.java.JavaRDD;
            import org.apache.spark.api.java.JavaSparkContext;
            import org.apache.spark.api.java.function.VoidFunction;
            import org.apache.spark.sql.*;
            import org.apache.spark.sql.streaming.StreamingQuery;
            import org.apache.spark.sql.streaming.StreamingQueryException;
            import org.apache.spark.sql.types.DataTypes;
            import org.apache.spark.sql.types.StructField;
            import org.apache.spark.sql.types.StructType;
            import org.apache.spark.streaming.Duration;
            import org.apache.spark.sql.SparkSession;
            import org.apache.spark.streaming.api.java.JavaDStream;
            import org.apache.spark.streaming.api.java.JavaPairInputDStream;
            import org.apache.spark.streaming.api.java.JavaStreamingContext;
            import org.apache.spark.streaming.kafka.KafkaUtils;

            import org.apache.spark.sql.Dataset;
            import org.apache.spark.sql.Row;

            import java.util.Collections;
            import java.util.HashMap;
            import java.util.Map;
            import java.util.Set;
            import org.apache.spark.api.java.function.Function;


            import kafka.serializer.StringDecoder;
            import scala.Tuple2;

            public class SparkConsumer {

                    public static void main(String[] args) throws InterruptedException {

                        SparkConf conf = new SparkConf().setAppName("readKafkajson").setMaster("local[*]");

                        JavaSparkContext sc = new JavaSparkContext(conf);

                        JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000));

                        // TODO: processing pipeline
                        Map<String, String> kafkaParams = new HashMap<String, String>();
                        kafkaParams.put("metadata.broker.list", "localhost:9092");
                        Set<String> topics = Collections.singleton("kafkajson");

                        JavaPairInputDStream<String, String> directKafkaStream =
                                KafkaUtils.createDirectStream(ssc, String.class, String.class, StringDecoder.class,
                                        StringDecoder.class, kafkaParams, topics);





                        JavaDStream<String> json = directKafkaStream.map(new Function<Tuple2<String,String>, String>() {
                            public String call(Tuple2<String,String> message) throws Exception {
                                System.out.println(message._2());
                                return message._2();
                            };
                        });

                        System.out.println(" json is  0------ 0"+ json);



                        json.foreachRDD(rdd -> {
                            rdd.foreach(
                                    record -> System.out.println(record));
                        });

                        //Create JavaRDD<Row>
                        json.foreachRDD(new VoidFunction<JavaRDD<String>>() {
                            @Override
                            public void call(JavaRDD<String> rdd) {
                                JavaRDD<Row> rowRDD = rdd.map(new Function<String, Row>() {
                                    @Override
                                    public Row call(String msg) {
                                        Row row = RowFactory.create(msg);
                                        return row;
                                    }
                                });
                                //Create Schema
                                StructType schema = DataTypes.createStructType(new StructField[] {DataTypes.createStructField("Message", DataTypes.StringType, true)});
                                //Get Spark 2.0 session
                                SparkSession spark = **JavaSparkSessionSingleton**.getInstance(rdd.context().getConf());
                                Dataset<Row> msgDataFrame = spark.createDataFrame(rowRDD, schema);
                                msgDataFrame.show();
                            }
                        });

                        ssc.start();
                        ssc.awaitTermination();

                    }
                }

我收到一条错误消息,指出无法解析符号JavaSparkSessionSingleton。

我正在使用Spark 2.0.1,我的maven依赖项看起来像这样:

             <dependency>
                        <groupId>org.apache.spark</groupId>
                        <artifactId>spark-core_2.11</artifactId>
                        <version>2.0.1</version>
                    </dependency>
                    <dependency>
                        <groupId>org.apache.spark</groupId>
                        <artifactId>spark-streaming_2.11</artifactId>
                        <version>2.0.1</version>
                    </dependency>
                     <dependency>
                        <groupId>org.apache.spark</groupId>
                        <artifactId>spark-streaming-kafka_2.11</artifactId>
                        <version>1.6.3</version>
                    </dependency>
                    <dependency>
                        <groupId>org.apache.spark</groupId>
                        <artifactId>spark-sql_2.11</artifactId>
                        <version>2.0.1</version>
                    </dependency>
                    <dependency>

我不确定我错过了什么。任何帮助表示赞赏。

1 个答案:

答案 0 :(得分:0)

正式的Spark文档会引导您创建一个Singleton类来保存您的会话,将其添加到您的类的底部:

class JavaSparkSessionSingleton {
  private static transient SparkSession instance = null;
  public static SparkSession getInstance(SparkConf sparkConf) {
    if (instance == null) {
      instance = SparkSession
        .builder()
        .config(sparkConf)
        .getOrCreate();
    }
    return instance;
  }
}

来自Spark doc的示例,完整示例:https://github.com/apache/spark/blob/master/examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java