我写了一个spark的应用程序,代码是:
package com.demo;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import scala.Tuple2;
import java.util.ArrayList;
import java.util.List;
/**
* Created by sdvdxl on 2016/3/14.
*/
public class SparkCalcDemo {
private static final String HADOOP_URL = "hdfs://10.10.1.110:8020/";
public static void main(String[] args) throws Exception {
SparkConf conf = new SparkConf().setAppName("test").setMaster("local[1]");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaRDD<String> textFile = sc.textFile(HADOOP_URL + "/xd/dataset1/2016/03/14/15/01", 1);
JavaRDD<String> words = textFile.flatMap(new FlatMapFunction<String, String>() {
public Iterable<String> call(String s) {
List<String> list = new ArrayList<String>();
JSONObject jobj = JSON.parseObject(new String(org.apache.commons.codec.binary.Base64.decodeBase64(s.substring(1, s.length() - 1))));
list.add(jobj.getString("name"));
list.add(jobj.getString("random"));
return list;
}
});
JavaPairRDD<String, Integer> pairs = words.mapToPair(new PairFunction<String, String, Integer>() {
public Tuple2<String, Integer> call(String s) {
return new Tuple2<String, Integer>(s, 1);
}
});
JavaPairRDD<String, Integer> counts = pairs.reduceByKey(new Function2<Integer, Integer, Integer>() {
public Integer call(Integer a, Integer b) {
return a + b;
}
});
counts.foreach(tuple2 ->
System.out.println(tuple2._1 + " : " + tuple2._2));
}
}
和pom内容是:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>kafka-demo</groupId>
<artifactId>kafka-demo</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.7</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.8</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>1.6.1</version>
<!--<exclusions>
<exclusion>
<groupId>com.fasterxml.jackson.module</groupId>
<artifactId>jackson-module-scala_2.10</artifactId>
</exclusion>
</exclusions>-->
</dependency>
<!--dependency>
<groupId>com.fasterxml.jackson.module</groupId>
<artifactId>jackson-module-scala_2.10</artifactId>
<version>2.7.2</version>
</dependency>-->
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.10</version>
</dependency>
<dependency>
<groupId>org.apache.camel</groupId>
<artifactId>camel-base64</artifactId>
<version>2.16.2</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.5.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>
<repositories>
<repository>
<id>oschina</id>
<url>http://maven.oschina.net/content/groups/public</url>
</repository>
<repository>
<id>mavenspring</id>
<url>http://maven.springframework.org/release</url>
</repository>
<repository>
<id>jcenter</id>
<url>http://jcenter.bintray.com</url>
</repository>
<repository>
<id>spring-milestones</id>
<name>Spring Milestones</name>
<url>http://repo.spring.io/milestone</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
<repository>
<id>spring-release</id>
<name>Spring Releases</name>
<url>http://repo.spring.io/libs-release</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
<repository>
<id>spring-snapshots</id>
<name>Spring Snapshots</name>
<url>http://repo.spring.io/snapshot</url>
<snapshots>
<enabled>true</enabled>
</snapshots>
<releases>
<enabled>false</enabled>
</releases>
</repository>
</repositories>
</project>
当我通过spark submit-shell提交时,它运行良好。但是当由xd job job create --name sparkAppDemo --definition "sparkapp --mainClass=com.demo.SparkCalcDemo --appJar=/home/spark/spark-app.jar --master=spark://localhost:7077" --deploy
job launch sparkAppDemo
提交时,没有结果输出并且在spring-xd admin ui的状态是开始时,它看起来有些不对劲并挂掉了。
我在这里截图两张图片: job status