我目前正在尝试使用maven打包我的项目,但是,通过简单的RDD操作,maven测试失败(引发异常)。
我正在使用Spark 2.3.0,Scala 2.11.8,JDK 8。
代码:
class A extends FunSuite with DataFrameSuiteBase with Matchers with Logging
with BeforeAndAfter with MockFactory {
val myList= List(
Array(22, 34, 56, 785),
Array(21,24,34,534,563),
Array(2, 34, 86,345),
Array(10, 34, 98),
Array(101, 456, 545,678,763),
Array(1, 3, 5,23),
Array(32, 322, 523,623),
Array(11, 22, 58,223),
Array(14, 545),
Array(22)
)
val myRDD: RDD[Array[Int]] = sc.parallelize(myList)
test("rddTest"){
val byValueCount = myRDD.flatMap(x => x).countByValue() //doesn't work
val rddCount = myRDD.count() //works
...}}
第二个计数工作正常,但是第一个计数(按值)抛出此异常:
- rddTest *** FAILED ***
java.lang.IllegalArgumentException:
at org.apache.xbean.asm5.ClassReader.<init>(Unknown Source)
at org.apache.xbean.asm5.ClassReader.<init>(Unknown Source)
at org.apache.xbean.asm5.ClassReader.<init>(Unknown Source)
at org.apache.spark.util.ClosureCleaner$.getClassReader(ClosureCleaner.scala:46)
at org.apache.spark.util.FieldAccessFinder$$anon$3$$anonfun$visitMethodInsn$2.apply(ClosureCleaner.scala:449)
at org.apache.spark.util.FieldAccessFinder$$anon$3$$anonfun$visitMethodInsn$2.apply(ClosureCleaner.scala:432)
at scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:733)
at scala.collection.mutable.HashMap$$anon$1$$anonfun$foreach$2.apply(HashMap.scala:103)
at scala.collection.mutable.HashMap$$anon$1$$anonfun$foreach$2.apply(HashMap.scala:103)
at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:230)
at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:40)
at scala.collection.mutable.HashMap$$anon$1.foreach(HashMap.scala:103)
at scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:732)
at org.apache.spark.util.FieldAccessFinder$$anon$3.visitMethodInsn(ClosureCleaner.scala:432)
at org.apache.xbean.asm5.ClassReader.a(Unknown Source)
at org.apache.xbean.asm5.ClassReader.b(Unknown Source)
at org.apache.xbean.asm5.ClassReader.accept(Unknown Source)
at org.apache.xbean.asm5.ClassReader.accept(Unknown Source)
at org.apache.spark.util.ClosureCleaner$$anonfun$org$apache$spark$util$ClosureCleaner$$clean$14.apply(ClosureCleaner.scala:262)
at org.apache.spark.util.ClosureCleaner$$anonfun$org$apache$spark$util$ClosureCleaner$$clean$14.apply(ClosureCleaner.scala:261)
at scala.collection.immutable.List.foreach(List.scala:381)
at org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:261)
at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:159)
at org.apache.spark.SparkContext.clean(SparkContext.scala:2292)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2066)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2092)
at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:939)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.RDD.collect(RDD.scala:938)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$countByKey$1.apply(PairRDDFunctions.scala:370)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$countByKey$1.apply(PairRDDFunctions.scala:370)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.PairRDDFunctions.countByKey(PairRDDFunctions.scala:369)
at org.apache.spark.rdd.RDD$$anonfun$countByValue$1.apply(RDD.scala:1208)
at org.apache.spark.rdd.RDD$$anonfun$countByValue$1.apply(RDD.scala:1208)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.RDD.countByValue(RDD.scala:1207)
使用IDE(intelliJ)进行测试就可以了。
编辑:完整POM :
<?xml version='1.0' encoding='UTF-8'?>
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://maven.apache.org/POM/4.0.0">
<modelVersion>4.0.0</modelVersion>
<groupId>project</groupId>
<artifactId>project_2.11</artifactId>
<packaging>jar</packaging>
<description>project</description>
<version>0.0.1-SNAPSHOT</version>
<name>project</name>
<organization>
<name>project</name>
</organization>
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<scala.version>2.11.8</scala.version>
<scala.compat.version>2.11</scala.compat.version>
<spark.cdh.version>${spark.version}.cloudera1</spark.cdh.version>
<mockito.version>1.9.5</mockito.version>
<spark.version>2.3.0</spark.version>
<java.version>1.8</java.version>
</properties>
<!-- repository to add org.apache.spark -->
<repositories>
<repository>
<id>cloudera-repo-releases</id>
<url>https://repository.cloudera.com/artifactory/repo/</url>
</repository>
</repositories>
<build>
<sourceDirectory>src/main/scala-2.10</sourceDirectory>
<testSourceDirectory>src/test/scala</testSourceDirectory>
<resources>
<!-- regular resource processsing for everything except logback.xml -->
<resource>
<directory>src/main/resources</directory>
</resource>
</resources>
<plugins>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<version>2.15.2</version>
<executions>
<execution>
<id>compile</id>
<goals>
<goal>compile</goal>
</goals>
<phase>compile</phase>
</execution>
<execution>
<id>test-compile</id>
<goals>
<goal>testCompile</goal>
</goals>
<phase>test-compile</phase>
</execution>
<execution>
<id>process-test</id>
<phase>process-test-resources</phase>
<goals>
<goal>testCompile</goal>
</goals>
</execution>
<execution>
<id>process</id>
<phase>process-resources</phase>
<goals>
<goal>compile</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.7</version>
<configuration>
<skipTests>true</skipTests>
<trimStackTrace>false</trimStackTrace>
</configuration>
</plugin>
<!-- enable scalatest -->
<plugin>
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
<version>1.0</version>
<configuration>
<parallel>false</parallel>
<reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
<junitxml>.</junitxml>
<filereports>WDF TestSuite.txt</filereports>
<htmlreporters>${project.build.directory}/html/scalatest</htmlreporters>
<testFailureIgnore>false</testFailureIgnore>
<!--<argLine>-Xmx2048m</argLine>-->
<argLine>-Xmx2048m -Dsun.io.serialization.extendedDebugInfo=true</argLine>
<stdout>F</stdout>
</configuration>
<executions>
<execution>
<id>test</id>
<goals>
<goal>test</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.4.3</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.11.8</version>
</dependency>
<dependency>
<groupId>org.pegdown</groupId>
<artifactId>pegdown</artifactId>
<version>1.4.2</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>project-utils</groupId>
<artifactId>project-utils_2.11</artifactId>
<version>0.0.1-SNAPSHOT</version>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>project-common</groupId>
<artifactId>project-common_2.11</artifactId>
<version>0.0.1-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.scoverage</groupId>
<artifactId>scalac-scoverage-runtime_2.11</artifactId>
<version>1.1.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.scoverage</groupId>
<artifactId>scalac-scoverage-plugin_2.11</artifactId>
<version>1.1.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-mllib_2.11</artifactId>
<version>${spark.version}</version>
<!--<scope>runtime</scope>-->
</dependency>
<dependency>
<groupId>com.holdenkarau</groupId>
<artifactId>spark-testing-base_2.11</artifactId>
<version>${spark.version}_0.9.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<version>1.4</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.scalaz</groupId>
<artifactId>scalaz-core_2.11</artifactId>
<version>7.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.11</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_2.11</artifactId>
<version>3.0.5</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalamock</groupId>
<artifactId>scalamock_2.11</artifactId>
<version>4.1.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalamock</groupId>
<artifactId>scalamock-scalatest-support_2.11</artifactId>
<version>3.6.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-all</artifactId>
<version>1.9.5</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.github.fommil.netlib</groupId>
<artifactId>all</artifactId>
<version>1.1.2</version>
<type>pom</type>
</dependency>
</dependencies>
</project>
在线答案适用于使用Java 9+的用户-我正在使用Java 8。 先前的解决方案是这样的:java.lang.IllegalArgumentException at org.apache.xbean.asm5.ClassReader.<init>(Unknown Source) with Java 10
但是,这不是我的情况的解决方案。 另外,这是一个非常突出的问题,绝不是重复的。
非常感谢!
答案 0 :(得分:1)
您的代码在我的机器上(Spark 2.3.0或Spark 2.3.1)工作正常。我认为依赖关系有些混乱。请提供完整的pom.xml。
答案 1 :(得分:0)
因此问题就像ALincoln所怀疑的那样导致依赖关系冲突。同一项目中的其他一些模块(即使它们不相关)与此模块存在冲突。 显然,root pom与不相关的模块建立了连接,最终结果是此错误。谢谢大家!