我正在使用Java在Spark上编写Word Count Code。 代码没有问题但是当我运行它时,在reducebykey上给出空指针异常。在此之前,所有步骤都提供了返回正确的rdds。
public static void main(String args[])
{
SparkConf conf = new SparkConf().setMaster("local").setAppName("app1").set("spark.rdd.compress","false").set("spark.shuffle.compress","false")
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaRDD<String> input_rdd = sc.textFile("C:/Users/skamal/Desktop/Spark_Input/file.txt", 1);
RDDCreation obj=new RDDCreation();
RedByKey obj1=new RedByKey();
FlatMap obj2=new FlatMap();
JavaRDD<String> word_rdd=input_rdd.flatMap(obj2);
JavaPairRDD<String, Integer> pair_RDD=word_rdd.mapToPair(obj);
JavaPairRDD<String, Integer> red_RDD=pair_RDD.reduceByKey(obj1);
JavaPairRDD<String, Integer> a=pair_RDD.reduceByKey(
new Function2<Integer, Integer, Integer>() {
public Integer call(Integer a, Integer b) { return a + b; }
});
JavaRDD<String> key_list=pair_RDD.keys();
JavaRDD<Integer> values_list=pair_RDD.values();
System.out.println("The Data from Input RDD is..."+input_rdd.collect());
System.out.println("************************");
System.out.println("Pair RDD Data is.."+pair_RDD.collect());
// System.out.println("Keys From the RDD is.."+key_list.collect());
// System.out.println("Values From the RDD is.."+values_list.collect());
System.out.println("The data from flat Map.."+word_rdd.collect());
System.out.println("Reduce By Key RDD is.."+a.count());
}
}