WARN TaskSchedulerImpl:初始作业没有接受任何资源;检查您的群集用户界面,以确保工作人员已注册并拥有足够的资源
public static void main(String[] args) throws Exception {
System.setProperty("hadoop.home.dir", "C://hadoop//");
// Create a Java Spark Context.
SparkConf sparkConf = new SparkConf().setAppName("wordCount").setMaster("spark://X.X.X.XX:XXXX");
sparkConf.set("spark.cores.max","6");
sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
sparkConf.set("spark.sql.tungsten.enabled", "true");
sparkConf.set("spark.default.parallelism", "4");
sparkConf.set("spark.app.id", "YourId");
sparkConf.set("num-executors", "3");
sparkConf.set("driver-memory", "4g");
sparkConf.set("executor-memory ", "2g");
sparkConf.set("executor-cores", "4");
JavaSparkContext sc = new JavaSparkContext(sparkConf);
// Load our input data.
System.setProperty("hadoop.home.dir", "C://hadoop//");
JavaRDD<String> inputFile = sc.textFile("D://sys1.txt");
JavaRDD<String> wordsFromFile = inputFile.flatMap(content -> Arrays.asList(content.split(" ")).iterator());
@SuppressWarnings("unchecked")
JavaPairRDD<String, String> countData = wordsFromFile.mapToPair(t -> new Tuple2(t, 1))
.reduceByKey((x, y) -> (int) x + (int) y);
countData.saveAsTextFile("CountData");
}
}
我想使用spark-rdd计算文件中的单词并存储到一个文件中。