我在通过spark Streaming从我的kafka制作人发送消息到HBase时遇到问题。
虽然我可以正确阅读它们,但当我尝试将消息放入Hbase时出现错误。
我的代码:
package fr.sf.poc.cdc;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.*;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.*;
import org.apache.spark.api.java.function.*;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.api.java.*;
import org.apache.spark.streaming.kafka010.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.common.serialization.StringDeserializer;
public class TestKafka
{
public static void main( String[] args ) throws Exception {
SparkConf sparkConf = new SparkConf().setAppName("JavaKafka");
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(2000));
Map<String, Object> kafkaParams = new HashMap<>();
kafkaParams.put("bootstrap.servers", "xxxxxxxxx:6667");
kafkaParams.put("key.deserializer", StringDeserializer.class);
kafkaParams.put("value.deserializer", StringDeserializer.class);
kafkaParams.put("group.id", "my_cons_group");
kafkaParams.put("auto.offset.reset", "latest");
kafkaParams.put("enable.auto.commit", false);
Collection<String> topics = Arrays.asList("sparktest");
final JavaInputDStream<ConsumerRecord<String, String>> stream =
KafkaUtils.createDirectStream(
jssc,
LocationStrategies.PreferConsistent(),
ConsumerStrategies.<String, String>Subscribe(topics, kafkaParams)
);
stream.foreachRDD(new VoidFunction<JavaRDD<ConsumerRecord<String, String>>>() {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public void call(JavaRDD<ConsumerRecord<String, String>> rdd) {
rdd.foreachPartition(new VoidFunction<Iterator<ConsumerRecord<String, String>>>() {
@Override
public void call(Iterator<ConsumerRecord<String, String>> consumerRecords) throws IOException {
String tableNameStr="sentelab_cdc:test";
Configuration hbaseConf = HBaseConfiguration.create();
Connection hBase = ConnectionFactory.createConnection(hbaseConf);
Table table = hBase.getTable(TableName.valueOf(tableNameStr));
while (consumerRecords.hasNext()){
ConsumerRecord<String, String> record = consumerRecords.next();
System.out.println("--" +record.value() + "--");
String key = record.key();
if (key == null){//no id sent by producer
key = record.value().split(" ")[0];
}
Put p = new Put(key.toString().getBytes(Charset.forName("UTF-8")));
p.addColumn("test".getBytes(Charset.forName("UTF-8")),
"message".getBytes(Charset.forName("UTF-8")),
record.value().toString().getBytes(Charset.forName("UTF-8")));
table.put(p);
}
}
});
}
});
jssc.start();
jssc.awaitTermination();
}
}
我开始我的火花流媒体工作,我的kafka制作人如下:
./ kafka-console-producer.sh --broker-list xxxxxxxxxx:6667 --topic sparktest
我输入了一个像#34; hello world&#34;
这样的句子然后我的火花流媒体作业发给我这个错误:
Hbase Connection successful
17/04/07 11:58:10 INFO CachedKafkaConsumer: Initial fetch for spark-executor-my_cons_group sparktest 0 260
17/04/07 11:58:10 INFO AbstractCoordinator: Discovered coordinator xxxxxxxxxxxxxx:6667 (id: 2147482646 rack: null) for group spark-executor-my_cons_group.
-key-hello--
-message-hello world--
17/04/07 11:58:10 ERROR Executor: Exception in task 0.0 in stage 8.0 (TID 8)
java.lang.RuntimeException: java.lang.NullPointerException
at org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithoutRetries(RpcRetryingCaller.java:208)
at org.apache.hadoop.hbase.client.ClientSmallReversedScanner.loadCache(ClientSmallReversedScanner.java:212)
at org.apache.hadoop.hbase.client.ClientSmallReversedScanner.next(ClientSmallReversedScanner.java:186)
at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateRegionInMeta(ConnectionManager.java:1275)
at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateRegion(ConnectionManager.java:1181)
at org.apache.hadoop.hbase.client.AsyncProcess.submit(AsyncProcess.java:395)
at org.apache.hadoop.hbase.client.AsyncProcess.submit(AsyncProcess.java:344)
at org.apache.hadoop.hbase.client.BufferedMutatorImpl.backgroundFlushCommits(BufferedMutatorImpl.java:238)
at org.apache.hadoop.hbase.client.BufferedMutatorImpl.flush(BufferedMutatorImpl.java:190)
at org.apache.hadoop.hbase.client.HTable.flushCommits(HTable.java:1422)
at org.apache.hadoop.hbase.client.HTable.put(HTable.java:1013)
at fr.sf.poc.cdc.TestKafka$1$1.call(TestKafka.java:82)
at fr.sf.poc.cdc.TestKafka$1$1.call(TestKafka.java:58)
at org.apache.spark.api.java.JavaRDDLike$$anonfun$foreachPartition$1.apply(JavaRDDLike.scala:218)
at org.apache.spark.api.java.JavaRDDLike$$anonfun$foreachPartition$1.apply(JavaRDDLike.scala:218)
at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$28.apply(RDD.scala:902)
at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$28.apply(RDD.scala:902)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1916)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1916)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)
at org.apache.spark.scheduler.Task.run(Task.scala:86)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.NullPointerException
at org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.getMetaReplicaNodes(ZooKeeperWatcher.java:489)
at org.apache.hadoop.hbase.zookeeper.MetaTableLocator.blockUntilAvailable(MetaTableLocator.java:558)
at org.apache.hadoop.hbase.client.ZooKeeperRegistry.getMetaRegionLocation(ZooKeeperRegistry.java:61)
at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateMeta(ConnectionManager.java:1211)
at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateRegion(ConnectionManager.java:1178)
at org.apache.hadoop.hbase.client.RpcRetryingCallerWithReadReplicas.getRegionLocations(RpcRetryingCallerWithReadReplicas.java:305)
at org.apache.hadoop.hbase.client.ScannerCallableWithReplicas.call(ScannerCallableWithReplicas.java:156)
at org.apache.hadoop.hbase.client.ScannerCallableWithReplicas.call(ScannerCallableWithReplicas.java:60)
at org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithoutRetries(RpcRetryingCaller.java:200)
... 24 more
而且我不明白为什么我会得到空指针异常......你知道可能出现什么问题吗?