SparkStreaming将一条kafka消息放入Hbase

时间:2017-04-07 16:20:43

标签: java hbase apache-kafka spark-streaming

我在通过spark Streaming从我的kafka制作人发送消息到HBase时遇到问题。

虽然我可以正确阅读它们,但当我尝试将消息放入Hbase时出现错误。

我的代码:

package fr.sf.poc.cdc;

import java.io.IOException;
import java.nio.charset.Charset;
import java.util.*;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.*;
import org.apache.spark.api.java.function.*;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.api.java.*;
import org.apache.spark.streaming.kafka010.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.common.serialization.StringDeserializer;

public class TestKafka 
{

    public static void main( String[] args ) throws Exception {

        SparkConf sparkConf = new SparkConf().setAppName("JavaKafka");
        JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(2000));

        Map<String, Object> kafkaParams = new HashMap<>();
        kafkaParams.put("bootstrap.servers", "xxxxxxxxx:6667");
        kafkaParams.put("key.deserializer", StringDeserializer.class);
        kafkaParams.put("value.deserializer", StringDeserializer.class);
        kafkaParams.put("group.id", "my_cons_group");
        kafkaParams.put("auto.offset.reset", "latest");
        kafkaParams.put("enable.auto.commit", false);
        Collection<String> topics = Arrays.asList("sparktest");



        final JavaInputDStream<ConsumerRecord<String, String>> stream =
                KafkaUtils.createDirectStream(
                        jssc,
                        LocationStrategies.PreferConsistent(),
                        ConsumerStrategies.<String, String>Subscribe(topics, kafkaParams)
                        );

        stream.foreachRDD(new VoidFunction<JavaRDD<ConsumerRecord<String, String>>>() {
            /**
             * 
             */
            private static final long serialVersionUID = 1L;

            @Override
            public void call(JavaRDD<ConsumerRecord<String, String>> rdd) {
                rdd.foreachPartition(new VoidFunction<Iterator<ConsumerRecord<String, String>>>() {
                    @Override
                    public void call(Iterator<ConsumerRecord<String, String>> consumerRecords) throws IOException {
                        String tableNameStr="sentelab_cdc:test";                        
                        Configuration hbaseConf = HBaseConfiguration.create();                      
                        Connection hBase = ConnectionFactory.createConnection(hbaseConf);
                        Table table = hBase.getTable(TableName.valueOf(tableNameStr));

                        while (consumerRecords.hasNext()){
                            ConsumerRecord<String, String> record = consumerRecords.next();
                            System.out.println("--" +record.value() + "--");
                            String key = record.key();
                            if (key == null){//no id sent by producer
                                key = record.value().split(" ")[0];
                            }
                            Put p = new Put(key.toString().getBytes(Charset.forName("UTF-8")));
                            p.addColumn("test".getBytes(Charset.forName("UTF-8")),
                                    "message".getBytes(Charset.forName("UTF-8")),
                                    record.value().toString().getBytes(Charset.forName("UTF-8")));
                            table.put(p);                 

                        }
                    }
                });
            }
        });
        jssc.start();
        jssc.awaitTermination();
    }
}

我开始我的火花流媒体工作,我的kafka制作人如下:

  

./ kafka-console-producer.sh --broker-list xxxxxxxxxx:6667 --topic sparktest

我输入了一个像#34; hello world&#34;

这样的句子

然后我的火花流媒体作业发给我这个错误:

Hbase Connection successful
17/04/07 11:58:10 INFO CachedKafkaConsumer: Initial fetch for spark-executor-my_cons_group sparktest 0 260
17/04/07 11:58:10 INFO AbstractCoordinator: Discovered coordinator xxxxxxxxxxxxxx:6667 (id: 2147482646 rack: null) for group spark-executor-my_cons_group.
-key-hello--
-message-hello world--
17/04/07 11:58:10 ERROR Executor: Exception in task 0.0 in stage 8.0 (TID 8)
java.lang.RuntimeException: java.lang.NullPointerException
        at org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithoutRetries(RpcRetryingCaller.java:208)
        at org.apache.hadoop.hbase.client.ClientSmallReversedScanner.loadCache(ClientSmallReversedScanner.java:212)
        at org.apache.hadoop.hbase.client.ClientSmallReversedScanner.next(ClientSmallReversedScanner.java:186)
        at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateRegionInMeta(ConnectionManager.java:1275)
        at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateRegion(ConnectionManager.java:1181)
        at org.apache.hadoop.hbase.client.AsyncProcess.submit(AsyncProcess.java:395)
        at org.apache.hadoop.hbase.client.AsyncProcess.submit(AsyncProcess.java:344)
        at org.apache.hadoop.hbase.client.BufferedMutatorImpl.backgroundFlushCommits(BufferedMutatorImpl.java:238)
        at org.apache.hadoop.hbase.client.BufferedMutatorImpl.flush(BufferedMutatorImpl.java:190)
        at org.apache.hadoop.hbase.client.HTable.flushCommits(HTable.java:1422)
        at org.apache.hadoop.hbase.client.HTable.put(HTable.java:1013)
        at fr.sf.poc.cdc.TestKafka$1$1.call(TestKafka.java:82)
        at fr.sf.poc.cdc.TestKafka$1$1.call(TestKafka.java:58)
        at org.apache.spark.api.java.JavaRDDLike$$anonfun$foreachPartition$1.apply(JavaRDDLike.scala:218)
        at org.apache.spark.api.java.JavaRDDLike$$anonfun$foreachPartition$1.apply(JavaRDDLike.scala:218)
        at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$28.apply(RDD.scala:902)
        at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$28.apply(RDD.scala:902)
        at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1916)
        at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1916)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)
        at org.apache.spark.scheduler.Task.run(Task.scala:86)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
        at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.NullPointerException
        at org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.getMetaReplicaNodes(ZooKeeperWatcher.java:489)
        at org.apache.hadoop.hbase.zookeeper.MetaTableLocator.blockUntilAvailable(MetaTableLocator.java:558)
        at org.apache.hadoop.hbase.client.ZooKeeperRegistry.getMetaRegionLocation(ZooKeeperRegistry.java:61)
        at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateMeta(ConnectionManager.java:1211)
        at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateRegion(ConnectionManager.java:1178)
        at org.apache.hadoop.hbase.client.RpcRetryingCallerWithReadReplicas.getRegionLocations(RpcRetryingCallerWithReadReplicas.java:305)
        at org.apache.hadoop.hbase.client.ScannerCallableWithReplicas.call(ScannerCallableWithReplicas.java:156)
        at org.apache.hadoop.hbase.client.ScannerCallableWithReplicas.call(ScannerCallableWithReplicas.java:60)
        at org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithoutRetries(RpcRetryingCaller.java:200)
        ... 24 more

而且我不明白为什么我会得到空指针异常......你知道可能出现什么问题吗?

0 个答案:

没有答案