我正在为Kinesis尝试SparkStreaming消费者的wordcount示例。我的Kinesis流中有5个分片,并运行本地[20]火花流消费流程。 我知道我的Stream中有一条消息,因为我能够使用简单的消费者阅读它们,但wordcount示例总是在dstream中显示空白的rdds。有哪些建议? rdd.toArray始终打印空白结果
// Spark Streaming batch interval
Duration batchInterval = new Duration(2000);
// Kinesis checkpoint interval. Same as batchInterval for this example.
Duration kinesisCheckpointInterval = batchInterval;
JavaStreamingContext ctx = new JavaStreamingContext(new SparkConf().setAppName("SparkKinesis").setMaster("local[20]"), batchInterval);
AmazonKinesisClient kinesisClient =
new AmazonKinesisClient(new DefaultAWSCredentialsProviderChain());
kinesisClient.setRegion(Region.getRegion(Regions.US_WEST_2));
int numShards =
kinesisClient.describeStream(SampleProducer.STREAM_NAME).getStreamDescription().getShards().size();
List<JavaDStream<byte[]>> streamsList = new ArrayList<>(numShards);
for (int i = 0; i < numShards; i++) {
streamsList.add(
KinesisUtils.createStream(ctx, "SparkKinesis", SampleProducer.STREAM_NAME, "kinesis.us-west-2.amazonaws.com", Region.getRegion(Regions.US_WEST_2).getName(),
InitialPositionInStream.TRIM_HORIZON, kinesisCheckpointInterval,
StorageLevel.MEMORY_AND_DISK_2())
);
}
streamsList.get(1).foreachRDD(rdd -> {
System.out.println(rdd.collect());});
streamsList.get(0).foreachRDD(rdd -> {
System.out.println("size of rdd" +rdd.toArray());
});