当我尝试在spark
上本地运行java程序时,这是我得到的错误import org.apache.log4j.Logger;
import org.apache.log4j.BasicConfigurator;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.*;
import org.apache.spark.streaming.*;
import org.apache.spark.streaming.api.java.*;
import org.apache.spark.streaming.kafka.*;
import scala.Tuple2;
import scala.Tuple4;
import scala.Tuple5;
import java.util.*;
public class SentimentAnalysis
{
private final Logger LOG = Logger.getLogger(this.getClass());
private static final String KAFKA_TOPIC = Properties.getString("rts.spark.kafka_topic");
private static final int KAFKA_PARALLELIZATION =
Properties.getInt("rts.spark.kafka_parallelization");
public static void main(String[] args)
{
BasicConfigurator.configure();
SparkConf conf = new SparkConf()
.setAppName("Twitter Sentiment Analysis");
if (args.length > 0)
conf.setMaster(args[0]);
else
conf.setMaster("local[2]");
JavaStreamingContext ssc = new JavaStreamingContext(conf,new Duration(2000));
Map<String, Integer> topicMap = new HashMap<String, Integer>();
topicMap.put(KAFKA_TOPIC, KAFKA_PARALLELIZATION);
JavaPairReceiverInputDStream<String, String> messages =
KafkaUtils.createStream(
ssc,
Properties.getString("rts.spark.zkhosts"),
"twitter.sentimentanalysis.kafka",
topicMap);
以下是代码:
rts.spark.zkhosts: node0,node2,node4
rts.spark.webserv: http://node0:3000/post
rts.spark.kafka_topic: twitter.tweet
rts.spark.kafka_parallelization: 4
rts.spark.hdfs_output_dir: hdfs://node0/user/spark/${rts.spark.kafka_topic}
rts.spark.hdfs_output_file: ${rts.spark.hdfs_output_dir}/scores
这是rts.kafka.properties
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>1.0.2</version>
<exclusions>
<exclusion>
<groupId>commons-httpclient</groupId>
<artifactId>commons-httpclient</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.4.1</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
<version>4.1.3</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.1.3</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.10</artifactId>
<version>1.0.2</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka_2.10</artifactId>
<version>1.0.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>0.20.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.2.0</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>commons-httpclient</groupId>
<artifactId>commons-httpclient</artifactId>
</exclusion>
</exclusions>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.2.0</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
</exclusions>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>commons-configuration</groupId>
<artifactId>commons-configuration</artifactId>
<version>1.9</version>
<exclusions>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.10.4</version>
</dependency>
我已经尝试过搜索所有内容,但还没有任何结果。 我是新手,请帮助。我正在尝试接收来自kafka主题的消息并通过火花流传输它们然后将其存储在hdfs中。 this is link from where i have taken the code
这里是所有依赖项:
{{1}}