在Spark独立集群上部署springboot jar时发生scala.collection.immutable.List异常

时间:2018-11-30 05:27:13

标签: java maven spring-boot apache-spark

在将由springboot开发并由maven打包的可执行jar部署到Spark群集上时,发生“无法将scala.collection.immutable.List $ SerializationProxy的实例分配给字段org.apache.spark.rdd.RDD.org”。在本地linux环境上运行时,同一程序成功运行。

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
....

<parent>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-starter-parent</artifactId>
    <version>1.5.12.RELEASE</version>
    <relativePath /> 
</parent>

<properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
    <java.version>1.8</java.version>
</properties>

<dependencies>
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-web</artifactId>
        <exclusions>
            <exclusion>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-starter-logging</artifactId>
            </exclusion>
        </exclusions>
    </dependency>
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-log4j2</artifactId>
    </dependency>
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-core_2.12</artifactId>
        <version>2.4.0</version>
    </dependency>
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-streaming_2.12</artifactId>
        <version>2.4.0</version>
    </dependency>
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-streaming-kafka_2.11</artifactId>
        <version>1.6.3</version>
    </dependency>
    <dependency>
        <groupId>com.thoughtworks.paranamer</groupId>
        <artifactId>paranamer</artifactId>
        <version>2.8</version>
    </dependency>
</dependencies>

<build>
    <finalName>spring-spark</finalName>
    <plugins>
        <plugin>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-maven-plugin</artifactId>
        </plugin>
    </plugins>

    <resources>
        <resource>
            <filtering>true</filtering>
            <directory>src/main/resources</directory>
            <excludes>
                <exclude>configuration/local/*</exclude>
                <exclude>configuration/dev/*</exclude>
            </excludes>
        </resource>
        <resource>
            <directory>src/main/resources/configuration/${env}</directory>
        </resource>
    </resources>
</build>

@SpringBootApplicationpublic 
class Application implements CommandLineRunner {

    @Autowired private SparkTestService service;

    public static void main(String[] args) {
        SpringApplication.run(Application.class, args);
    }

    @Override
    public void run(String... args) throws Exception {
        service.run();
    }
}

config是

@Bean
@ConditionalOnMissingBean(SparkConf.class)
public SparkConf sparkConf() throws Exception {
    SparkConf conf = new SparkConf()
            .setAppName(appName)
            .setMaster("spark://192.168.43.20:7077");
    return conf;
}

@Bean
@ConditionalOnMissingBean(JavaSparkContext.class)
public JavaSparkContext javaSparkContext() throws Exception {
   return new JavaSparkContext(sparkConf());
}

aaaaa

@Service  public class SparkTestService implements Serializable {
  private static final long serialVersionUID = 1L;
  private final static Logger logger = 
            LoggerFactory.getLogger(SparkTestService.class);
  private static final Pattern SPACE = Pattern.compile(" ");

  @Autowired
  private transient JavaSparkContext sc;

  public Map<String, Integer> run() {
    Map<String, Integer> result = new HashMap<>();
    JavaRDD<String> lines = sc.textFile("/softwares/test").cache();
    System.out.println(lines.count());

    JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        private static final long serialVersionUID = 1L;

        @Override
        public Iterator<String> call(String line) throws Exception {
            return Arrays.asList(line.split(" ")).iterator();
        }
    });

    JavaPairRDD<String, Integer> pairs = words.mapToPair(new PairFunction<String, String, Integer>() {
        private static final long serialVersionUID = 1L;

        @Override
        public Tuple2<String, Integer> call(String word) throws Exception {
            return new Tuple2<String, Integer>(word, 1);
        }
    });

    JavaPairRDD<String, Integer> wordCounts = pairs.reduceByKey(new Function2<Integer, Integer, Integer>() {
        private static final long serialVersionUID = 1L;

        @Override
        public Integer call(Integer v1, Integer v2) throws Exception {
            return v1 + v2;

        }
    });

    wordCounts.foreach(new VoidFunction<Tuple2<String, Integer>>() {
        private static final long serialVersionUID = 1L;

        @Override
        public void call(Tuple2<String, Integer> wordCount) throws Exception {
            System.out.println(wordCount._1 + "------" + wordCount._2 + "times.");
        }
    });

    return result;
}

}

./spark-2.4.0-bin-hadoop2.7/bin/spark-submit --master spark://s0:7077 --driver-memory 600m  --executor-memory 600m --executor-cores 1  --class org.springframework.boot.loader.JarLauncher /dkd/softwares/spring-spark.jar

例外是:

Caused by: java.lang.ClassCastException: cannot assign instance of scala.collection.immutable.List$SerializationProxy to field org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$dependencies_ of type scala.collection.Seq in instance of org.apache.spark.rdd.MapPartitionsRDD

0 个答案:

没有答案