团队。当我在火花群集上运行程序以将RDD
保存到phoenix
时,我总是报告此错误。如下:
17/06/12 09:35:46 ERROR kerberos.SaveAsPhoenixApp$: save to phoenix error :java.lang.RuntimeException: java.lang.ClassNotFoundException: Class org.apache.phoenix.mapreduce.PhoenixOutputFormat not found java.lang.RuntimeException: java.lang.ClassNotFoundException: Class org.apache.phoenix.mapreduce.PhoenixOutputFormat not found
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2199)
at org.apache.hadoop.mapreduce.task.JobContextImpl.getOutputFormatClass(JobContextImpl.java:232)
at org.apache.spark.rdd.PairRDDFunctions$anonfun$saveAsNewAPIHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1083)
at org.apache.spark.rdd.PairRDDFunctions$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1074)
at org.apache.spark.rdd.PairRDDFunctions$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1074)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
at org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopDataset(PairRDDFunctions.scala:1074)
at org.apache.spark.rdd.PairRDDFunctions$anonfun$saveAsNewAPIHadoopFile$2.apply$mcV$sp(PairRDDFunctions.scala:994)
at org.apache.spark.rdd.PairRDDFunctions$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:985)
at org.apache.spark.rdd.PairRDDFunctions$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:985)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
at org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopFile(PairRDDFunctions.scala:985)
at org.apache.phoenix.spark.ProductRDDFunctions.saveToPhoenix(ProductRDDFunctions.scala:51)
at com.yjf.phoenix.kerberos.SaveAsPhoenixApp$.saveRDDToPhoenixSIMPLELOG(SaveAsPhoenixApp.scala:144)
at com.yjf.phoenix.kerberos.SaveAsPhoenixApp$.saveAsphoenixProcess(SaveAsPhoenixApp.scala:93)
at com.yjf.phoenix.kerberos.SaveAsPhoenixApp$.main(SaveAsPhoenixApp.scala:156)
at com.yjf.phoenix.kerberos.SaveAsPhoenixApp.main(SaveAsPhoenixApp.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$runMain(SparkSubmit.scala:731)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) Caused by: java.lang.ClassNotFoundException: Class org.apache.phoenix.mapreduce.PhoenixOutputFormat not found
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2105)
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2197) ... 29 more
我的提交脚本如下:
spark-submit \
--master yarn-client \
--driver-memory 2g \
--num-executors 3 \
--driver-cores 2 \
--executor-cores 2 \
--executor-memory 2g \
--class com.yjf.phoenix.kerberos.SaveAsPhoenixApp \
--principal log2hadoop \
--keytab /home/spark/conf/log2hadoop_bgp.keytab \
/home/spark/lib/test-phonenix.jar yarn-client /user/log2hadoop/data/phoenix.txt >> ./logs/test-phoenix-$(date +%Y-%m_%d).log 2>&1 &
和pom.xml文件的内容如下
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.yjf</groupId>
<artifactId>test-phonenix</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<!--add maven release-->
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
<encoding>UTF-8</encoding>
<!--scala version-->
<scala.version>2.10.5</scala.version>
<jackson.version>2.3.0</jackson.version>
<!--cdh-spark-->
<spark.cdh.version>1.6.0-cdh5.8.0</spark.cdh.version>
<!--parquet.cdh.version-->
<parquet.cdh.version>1.8.1-cdh5.8.0-SNAPSHOT</parquet.cdh.version>
<!--cdh-hadoop-->
<hadoop.cdh.version>2.6.0-cdh5.8.0</hadoop.cdh.version>
<!--cdh-hbase-->
<cdh.hbase.version>1.2.0-cdh5.8.0</cdh.hbase.version>
<!--phoenix version,需要和cdh安装的phoenix一致版本,hbase的版本可以低-->
<phoenix.cdh.version>4.7.0-HBase-0.98</phoenix.cdh.version>
</properties>
<repositories>
<repository>
<id>scala-tools.org</id>
<name>Scala-Tools Maven2 Repository</name>
<url>http://scala-tools.org/repo-releases</url>
</repository>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
<repository>
<id>mvnrepository</id>
<url>https://mvnrepository.com/artifact/</url>
</repository>
<!---->
<repository>
<id>mvnrespositorycentral</id>
<url>http://central.maven.org/maven2/</url>
</repository>
</repositories>
<pluginRepositories>
<pluginRepository>
<id>scala-tools.org</id>
<name>Scala-Tools Maven2 Repository</name>
<url>http://scala-tools.org/repo-releases</url>
</pluginRepository>
</pluginRepositories>
<dependencies><!-- https://mvnrepository.com/artifact/org.apache.phoenix/phoenix -->
<!--phoenix hbase-->
<dependency>
<groupId>org.apache.phoenix</groupId>
<artifactId>phoenix-core</artifactId>
<version>${phoenix.cdh.version}</version>
</dependency>
<!--phoenix spark -->
<dependency>
<groupId>org.apache.phoenix</groupId>
<artifactId>phoenix-spark</artifactId>
<version>${phoenix.cdh.version}</version>
</dependency>
<!--spark sql-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-catalyst_2.10</artifactId>
<version>${spark.cdh.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.10</artifactId>
<version>${spark.cdh.version}</version>
</dependency>
<!--apache parquet-->
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-hadoop</artifactId>
<version>${parquet.cdh.version}</version>
</dependency>
<!--hadoop-->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.cdh.version}</version>
<exclusions>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.cdh.version}</version>
<exclusions>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop.cdh.version}</version>
<exclusions>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<!--引入CDH对应版本的本地库的spark包-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-assembly_2.10</artifactId>
<version>${spark.cdh.version}</version>
<scope>system</scope>
<!--<systemPath>D:/ngaa/workspace/bigdata/spark/lib/spark-assembly-1.5.0-cdh5.5.0-hadoop2.6.0-cdh5.5.0.jar</systemPath>-->
<systemPath>/opt/jars/spark-assembly-1.6.0-cdh5.8.0-hadoop2.6.0-cdh5.8.0.jar
</systemPath>
</dependency>
</dependencies>
<!--maven打包-->
<build>
<finalName>test-phonenix</finalName>
<sourceDirectory>src/main/scala</sourceDirectory>
<testSourceDirectory>src/test/scala</testSourceDirectory>
<plugins>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<version>2.15.2</version>
<executions>
<execution>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
<configuration>
<scalaVersion>${scala.version}</scalaVersion>
<args>
<arg>-target:jvm-1.7</arg>
</args>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-eclipse-plugin</artifactId>
<configuration>
<downloadSources>true</downloadSources>
<buildcommands>
<buildcommand>ch.epfl.lamp.sdt.core.scalabuilder</buildcommand>
</buildcommands>
<additionalProjectnatures>
<projectnature>ch.epfl.lamp.sdt.core.scalanature</projectnature>
</additionalProjectnatures>
<classpathContainers>
<classpathContainer>org.eclipse.jdt.launching.JRE_CONTAINER</classpathContainer>
<classpathContainer>ch.epfl.lamp.sdt.launching.SCALA_CONTAINER</classpathContainer>
</classpathContainers>
</configuration>
</plugin>
<!-- <plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<mainClass></mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>-->
</plugins>
</build>
<reporting>
<plugins>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<configuration>
<scalaVersion>${scala.version}</scalaVersion>
</configuration>
</plugin>
</plugins>
</reporting>
</project>
我使用CDH版本为5.8.0,Phoenix parcels包是CLABS_PHOENIX-4.7.0-1.clabs_phoenix1.3.0.p0.000-el5.parcel。 请求你的帮助,非常感谢你! 我已经解决了!
答案 0 :(得分:0)
代表@jifei yang回答,
他在/etc/spark/conf/classpath.txt
中添加了所有凤凰库jar路径
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/phoenix-core-4.7.0-clabs-phoenix1.3.0.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/phoenix-spark-4.7.0-clabs-phoenix1.3.0.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/calcite-avatica-1.6.0.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/calcite-avatica-server-1.6.0.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/commons-csv-1.0.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/hbase-prefix-tree-1.2.0-cdh5.7.0.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/hbase-procedure-1.2.0-cdh5.7.0.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/phoenix-flume-4.7.0-clabs-phoenix1.3.0.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/phoenix-pherf-4.7.0-clabs-phoenix1.3.0.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/phoenix-pherf-4.7.0-clabs-phoenix1.3.0-minimal.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/phoenix-pig-4.7.0-clabs-phoenix1.3.0.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/phoenix-server-4.7.0-clabs-phoenix1.3.0.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/phoenix-server-4.7.0-clabs-phoenix1.3.0-runnable.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/phoenix-server-client-4.7.0-clabs-phoenix1.3.0.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/twill-api-0.6.0-incubating.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/twill-common-0.6.0-incubating.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/twill-core-0.6.0-incubating.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/twill-discovery-api-0.6.0-incubating.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/twill-discovery-core-0.6.0-incubating.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/twill-zookeeper-0.6.0-incubating.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/tephra-api-0.7.0.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/tephra-core-0.7.0.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/tephra-hbase-compat-0.98-0.7.0.jar
除了上述步骤,他还在spark cluster
这解决了他的问题