import org.apache.spark.api.java.*;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.Function;
public class SimpleApp {
public static void main(String[] args) {
String logFile = "README.md"; // Should be some file on your system
System.setProperty("hadoop.home.dir", "C:\\hadooponwindows-master\\hadooponwindows-master");
SparkConf conf = new SparkConf().setAppName("Simple Application").setMaster("spark://192.168.121.59:7077");
// SparkConf conf = new SparkConf().setAppName("Simple Application").setMaster("local[*]");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaRDD<String> logData = sc.textFile(logFile).cache();
long numAs = logData.filter(new Function<String, Boolean>() {
public Boolean call(String s) { return s.contains("spark"); }
}).count();
long numBs =logData.filter(new Function<String, Boolean>() {
public Boolean call(String s) { return s.contains("b"); }
}).count();
System.out.println("Lines with a: " + numAs + ", lines with b: " + numBs);
sc.stop();
}
}
以上代码不适用于远程主控。下面提示警告 - TaskSchedulerImpl:初始作业未接受任何资源;检查您的集群UI以确保工作人员已注册并拥有足够的资源。
但是,如果我使用本地主人运行,那么它正常运作。