未找到类org.apache.phoenix.mapreduce.PhoenixOutputFormat

时间:2017-06-12 01:41:59

标签: scala apache-spark phoenix

团队。当我在火花群集上运行程序以将RDD保存到phoenix时,我总是报告此错误。如下:

17/06/12 09:35:46 ERROR kerberos.SaveAsPhoenixApp$: save to phoenix error :java.lang.RuntimeException: java.lang.ClassNotFoundException: Class org.apache.phoenix.mapreduce.PhoenixOutputFormat not found java.lang.RuntimeException: java.lang.ClassNotFoundException: Class org.apache.phoenix.mapreduce.PhoenixOutputFormat not found
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2199)
at org.apache.hadoop.mapreduce.task.JobContextImpl.getOutputFormatClass(JobContextImpl.java:232)
at org.apache.spark.rdd.PairRDDFunctions$anonfun$saveAsNewAPIHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1083)
at org.apache.spark.rdd.PairRDDFunctions$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1074)
at org.apache.spark.rdd.PairRDDFunctions$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1074)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
at org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopDataset(PairRDDFunctions.scala:1074)
at org.apache.spark.rdd.PairRDDFunctions$anonfun$saveAsNewAPIHadoopFile$2.apply$mcV$sp(PairRDDFunctions.scala:994)
at org.apache.spark.rdd.PairRDDFunctions$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:985)
at org.apache.spark.rdd.PairRDDFunctions$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:985)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
at org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopFile(PairRDDFunctions.scala:985)
at org.apache.phoenix.spark.ProductRDDFunctions.saveToPhoenix(ProductRDDFunctions.scala:51)
at com.yjf.phoenix.kerberos.SaveAsPhoenixApp$.saveRDDToPhoenixSIMPLELOG(SaveAsPhoenixApp.scala:144)
at com.yjf.phoenix.kerberos.SaveAsPhoenixApp$.saveAsphoenixProcess(SaveAsPhoenixApp.scala:93)
at com.yjf.phoenix.kerberos.SaveAsPhoenixApp$.main(SaveAsPhoenixApp.scala:156)
at com.yjf.phoenix.kerberos.SaveAsPhoenixApp.main(SaveAsPhoenixApp.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$runMain(SparkSubmit.scala:731)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) Caused by: java.lang.ClassNotFoundException: Class org.apache.phoenix.mapreduce.PhoenixOutputFormat not found
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2105)
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2197) ... 29 more

我的提交脚本如下:

spark-submit \
--master yarn-client  \
--driver-memory 2g    \
--num-executors 3     \
--driver-cores 2      \
--executor-cores 2    \
--executor-memory 2g  \
--class com.yjf.phoenix.kerberos.SaveAsPhoenixApp \
--principal log2hadoop              \
--keytab   /home/spark/conf/log2hadoop_bgp.keytab    \
/home/spark/lib/test-phonenix.jar yarn-client  /user/log2hadoop/data/phoenix.txt >> ./logs/test-phoenix-$(date +%Y-%m_%d).log 2>&1 &

和pom.xml文件的内容如下

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>com.yjf</groupId>
    <artifactId>test-phonenix</artifactId>
    <version>1.0-SNAPSHOT</version>
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
        <!--add  maven release-->
        <maven.compiler.source>1.7</maven.compiler.source>
        <maven.compiler.target>1.7</maven.compiler.target>
        <encoding>UTF-8</encoding>
        <!--scala version-->
        <scala.version>2.10.5</scala.version>
        <jackson.version>2.3.0</jackson.version>
        <!--cdh-spark-->
        <spark.cdh.version>1.6.0-cdh5.8.0</spark.cdh.version>
        <!--parquet.cdh.version-->
        <parquet.cdh.version>1.8.1-cdh5.8.0-SNAPSHOT</parquet.cdh.version>
        <!--cdh-hadoop-->
        <hadoop.cdh.version>2.6.0-cdh5.8.0</hadoop.cdh.version>
        <!--cdh-hbase-->
        <cdh.hbase.version>1.2.0-cdh5.8.0</cdh.hbase.version>
        <!--phoenix version,需要和cdh安装的phoenix一致版本,hbase的版本可以低-->
        <phoenix.cdh.version>4.7.0-HBase-0.98</phoenix.cdh.version>
    </properties>
    <repositories>
        <repository>
            <id>scala-tools.org</id>
            <name>Scala-Tools Maven2 Repository</name>
            <url>http://scala-tools.org/repo-releases</url>
        </repository>
        <repository>
            <id>cloudera</id>
            <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
        </repository>
        <repository>
            <id>mvnrepository</id>
            <url>https://mvnrepository.com/artifact/</url>
        </repository>
        <!---->
        <repository>
            <id>mvnrespositorycentral</id>
            <url>http://central.maven.org/maven2/</url>
        </repository>
    </repositories>
    <pluginRepositories>
        <pluginRepository>
            <id>scala-tools.org</id>
            <name>Scala-Tools Maven2 Repository</name>
            <url>http://scala-tools.org/repo-releases</url>
        </pluginRepository>
    </pluginRepositories>
    <dependencies><!-- https://mvnrepository.com/artifact/org.apache.phoenix/phoenix -->
        <!--phoenix  hbase-->
        <dependency>
            <groupId>org.apache.phoenix</groupId>
            <artifactId>phoenix-core</artifactId>
            <version>${phoenix.cdh.version}</version>
        </dependency>
        <!--phoenix spark -->
        <dependency>
            <groupId>org.apache.phoenix</groupId>
            <artifactId>phoenix-spark</artifactId>
            <version>${phoenix.cdh.version}</version>
        </dependency>
        <!--spark sql-->
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-catalyst_2.10</artifactId>
            <version>${spark.cdh.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.10</artifactId>
            <version>${spark.cdh.version}</version>
        </dependency>
        <!--apache parquet-->
        <dependency>
            <groupId>org.apache.parquet</groupId>
            <artifactId>parquet-hadoop</artifactId>
            <version>${parquet.cdh.version}</version>
        </dependency>
        <!--hadoop-->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoop.cdh.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>javax.servlet</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
                <exclusion>
                    <artifactId>slf4j-log4j12</artifactId>
                    <groupId>org.slf4j</groupId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>${hadoop.cdh.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>javax.servlet</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>${hadoop.cdh.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>javax.servlet</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <!--引入CDH对应版本的本地库的spark包-->
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-assembly_2.10</artifactId>
            <version>${spark.cdh.version}</version>
            <scope>system</scope>
            <!--<systemPath>D:/ngaa/workspace/bigdata/spark/lib/spark-assembly-1.5.0-cdh5.5.0-hadoop2.6.0-cdh5.5.0.jar</systemPath>-->
            <systemPath>/opt/jars/spark-assembly-1.6.0-cdh5.8.0-hadoop2.6.0-cdh5.8.0.jar
            </systemPath>
        </dependency>
    </dependencies>
    <!--maven打包-->
    <build>
        <finalName>test-phonenix</finalName>
        <sourceDirectory>src/main/scala</sourceDirectory>
        <testSourceDirectory>src/test/scala</testSourceDirectory>
        <plugins>
            <plugin>
                <groupId>org.scala-tools</groupId>
                <artifactId>maven-scala-plugin</artifactId>
                <version>2.15.2</version>
                <executions>
                    <execution>
                        <goals>
                            <goal>compile</goal>
                            <goal>testCompile</goal>
                        </goals>
                    </execution>
                </executions>
                <configuration>
                    <scalaVersion>${scala.version}</scalaVersion>
                    <args>
                        <arg>-target:jvm-1.7</arg>
                    </args>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-eclipse-plugin</artifactId>
                <configuration>
                    <downloadSources>true</downloadSources>
                    <buildcommands>
                        <buildcommand>ch.epfl.lamp.sdt.core.scalabuilder</buildcommand>
                    </buildcommands>
                    <additionalProjectnatures>
                        <projectnature>ch.epfl.lamp.sdt.core.scalanature</projectnature>
                    </additionalProjectnatures>
                    <classpathContainers>
                        <classpathContainer>org.eclipse.jdt.launching.JRE_CONTAINER</classpathContainer>
                      <classpathContainer>ch.epfl.lamp.sdt.launching.SCALA_CONTAINER</classpathContainer>
                    </classpathContainers>
                </configuration>
            </plugin>
          <!--  <plugin>
                <artifactId>maven-assembly-plugin</artifactId>
                <configuration>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                    <archive>
                        <manifest>
                            <mainClass></mainClass>
                        </manifest>
                    </archive>
                </configuration>
                <executions>
                    <execution>
                        <id>make-assembly</id>
                        <phase>package</phase>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>-->
        </plugins>
    </build>
    <reporting>
        <plugins>
            <plugin>
                <groupId>org.scala-tools</groupId>
                <artifactId>maven-scala-plugin</artifactId>
                <configuration>
                    <scalaVersion>${scala.version}</scalaVersion>
                </configuration>
            </plugin>
        </plugins>
    </reporting>
</project>

我使用CDH版本为5.8.0,Phoenix parcels包是CLABS_PHOENIX-4.7.0-1.clabs_phoenix1.3.0.p0.000-el5.parcel。 请求你的帮助,非常感谢你! 我已经解决了!

1 个答案:

答案 0 :(得分:0)

代表@jifei yang回答,

他在/etc/spark/conf/classpath.txt中添加了所有凤凰库jar路径

/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/phoenix‌​-core-4.7.0-clabs-ph‌​oenix1.3.0.jar 
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/phoenix‌​-spark-4.7.0-clabs-p‌​hoenix1.3.0.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/calcite‌​-avatica-1.6.0.jar 
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/calcite‌​-avatica-server-1.6.‌​0.jar 
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/commons‌​-csv-1.0.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/hbase-p‌​refix-tree-1.2.0-cdh‌​5.7.0.jar 
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/hbase-p‌​rocedure-1.2.0-cdh5.‌​7.0.jar 
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/phoenix‌​-flume-4.7.0-clabs-p‌​hoenix1.3.0.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/phoenix‌​-pherf-4.7.0-clabs-p‌​hoenix1.3.0.jar 
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/phoenix‌​-pherf-4.7.0-clabs-p‌​hoenix1.3.0-minimal.‌​jar 
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/phoenix‌​-pig-4.7.0-clabs-pho‌​enix1.3.0.jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/phoenix‌​-server-4.7.0-clabs-‌​phoenix1.3.0.jar 
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/phoenix‌​-server-4.7.0-clabs-‌​phoenix1.3.0-runnabl‌​e.jar 
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/phoenix‌​-server-client-4.7.0‌​-clabs-phoenix1.3.0.‌​jar
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/twill-a‌​pi-0.6.0-incubating.‌​jar 
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/twill-c‌​ommon-0.6.0-incubati‌​ng.jar 
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/twill-c‌​ore-0.6.0-incubating‌​.jar 
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/twill-d‌​iscovery-api-0.6.0-i‌​ncubating.jar 
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/twill-d‌​iscovery-core-0.6.0-‌​incubating.jar 
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/twill-z‌​ookeeper-0.6.0-incub‌​ating.jar 
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/tephra-‌​api-0.7.0.jar 
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/tephra-‌​core-0.7.0.jar 
/opt/cloudera/parcels/CLABS_PHOENIX/lib/phoenix/lib/tephra-‌​hbase-compat-0.98-0.‌​7.0.jar

除了上述步骤,他还在spark cluster

的所有节点上分发了罐子

这解决了他的问题