使用带有sparkconf对象的javastreamingcontext从kafka主题中使用时出错

时间:2017-03-14 10:05:49

标签: java apache-spark spark-streaming

当我尝试在spark

上本地运行java程序时,这是我得到的错误
import org.apache.log4j.Logger;

import org.apache.log4j.BasicConfigurator;

import org.apache.spark.SparkConf;

import org.apache.spark.api.java.function.*;

import org.apache.spark.streaming.*;

import org.apache.spark.streaming.api.java.*;

import org.apache.spark.streaming.kafka.*;

import scala.Tuple2;

import scala.Tuple4;

import scala.Tuple5;

import java.util.*;

public class SentimentAnalysis

{

private final Logger LOG = Logger.getLogger(this.getClass());


private static final String KAFKA_TOPIC =           Properties.getString("rts.spark.kafka_topic");

private static final int KAFKA_PARALLELIZATION =
    Properties.getInt("rts.spark.kafka_parallelization");

public static void main(String[] args)
{
    BasicConfigurator.configure();
    SparkConf conf = new SparkConf()
                     .setAppName("Twitter Sentiment Analysis");

    if (args.length > 0)
        conf.setMaster(args[0]);
    else
        conf.setMaster("local[2]");

    JavaStreamingContext ssc = new JavaStreamingContext(conf,new Duration(2000));

    Map<String, Integer> topicMap = new HashMap<String, Integer>();
    topicMap.put(KAFKA_TOPIC, KAFKA_PARALLELIZATION);

    JavaPairReceiverInputDStream<String, String> messages =
        KafkaUtils.createStream(
            ssc,
            Properties.getString("rts.spark.zkhosts"),
            "twitter.sentimentanalysis.kafka",
            topicMap);

以下是代码:

rts.spark.zkhosts: node0,node2,node4
rts.spark.webserv: http://node0:3000/post
rts.spark.kafka_topic: twitter.tweet
rts.spark.kafka_parallelization: 4
rts.spark.hdfs_output_dir: hdfs://node0/user/spark/${rts.spark.kafka_topic}
rts.spark.hdfs_output_file: ${rts.spark.hdfs_output_dir}/scores

这是rts.kafka.properties

<dependency>
  <groupId>org.apache.spark</groupId>
  <artifactId>spark-core_2.10</artifactId>
  <version>1.0.2</version>
  <exclusions>
    <exclusion>
      <groupId>commons-httpclient</groupId>
      <artifactId>commons-httpclient</artifactId>
    </exclusion>
  </exclusions>
</dependency>

<dependency>
  <groupId>log4j</groupId>
  <artifactId>log4j</artifactId>
  <version>1.2.17</version>
</dependency>
<dependency>
  <groupId>com.fasterxml.jackson.core</groupId>
  <artifactId>jackson-databind</artifactId>
  <version>2.4.1</version>
</dependency>
<dependency>
  <groupId>org.apache.httpcomponents</groupId>
  <artifactId>httpcore</artifactId>
  <version>4.1.3</version>
  <scope>provided</scope>
</dependency>
<dependency>
  <groupId>org.apache.httpcomponents</groupId>
  <artifactId>httpclient</artifactId>
  <version>4.1.3</version>
  <scope>provided</scope>
</dependency>
<dependency>
  <groupId>org.apache.spark</groupId>
  <artifactId>spark-streaming_2.10</artifactId>
  <version>1.0.2</version>
</dependency>
<dependency>
  <groupId>org.apache.spark</groupId>
  <artifactId>spark-streaming-kafka_2.10</artifactId>
  <version>1.0.2</version>
</dependency>


<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>0.20.2</version>
</dependency>


<dependency>
  <groupId>org.apache.hadoop</groupId>
  <artifactId>hadoop-common</artifactId>
  <version>2.2.0</version>
  <exclusions>
    <exclusion>
      <groupId>org.slf4j</groupId>
      <artifactId>slf4j-log4j12</artifactId>
    </exclusion>
    <exclusion>
      <groupId>log4j</groupId>
      <artifactId>log4j</artifactId>
    </exclusion>
    <exclusion>
      <groupId>commons-httpclient</groupId>
      <artifactId>commons-httpclient</artifactId>
    </exclusion>
  </exclusions>
  <scope>provided</scope>
</dependency>
<dependency>
  <groupId>org.apache.hadoop</groupId>
  <artifactId>hadoop-client</artifactId>
  <version>2.2.0</version>
  <exclusions>
    <exclusion>
      <groupId>org.slf4j</groupId>
      <artifactId>slf4j-log4j12</artifactId>
    </exclusion>
    <exclusion>
      <groupId>log4j</groupId>
      <artifactId>log4j</artifactId>
    </exclusion>
  </exclusions>
  <scope>provided</scope>
</dependency>
<dependency>
  <groupId>commons-configuration</groupId>
  <artifactId>commons-configuration</artifactId>
  <version>1.9</version>
  <exclusions>
    <exclusion>
      <groupId>log4j</groupId>
      <artifactId>log4j</artifactId>
    </exclusion>
  </exclusions>
</dependency>
<dependency>
  <groupId>org.scala-lang</groupId>
  <artifactId>scala-library</artifactId>
  <version>2.10.4</version>
</dependency>

我已经尝试过搜索所有内容,但还没有任何结果。 我是新手,请帮助。我正在尝试接收来自kafka主题的消息并通过火花流传输它们然后将其存储在hdfs中。 this is link from where i have taken the code

这里是所有依赖项:

{{1}}

0 个答案:

没有答案