批量加载HBase - NoClassDefFoundError(HBaseConfiguration)

时间:2017-01-16 02:30:22

标签: java maven hadoop mapreduce hbase

我的目标是在Cloudera集群(CDH 5.9)上运行一个简单的MapReduce作业,该集群从CSV文件读取并在HBase中写出(批量加载)。

我使用的是SpringSource Tool Suite IDE。

我在执行此程序后遇到异常。

Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/hadoop/hbase/HBaseConfiguration
    at drivers.Driver.main(Driver.java:27)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:606)
    at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
    at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.hbase.HBaseConfiguration
    at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
    at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
    at java.security.AccessController.doPrivileged(Native Method)
    at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
    ... 7 more

这是我的驱动程序代码:

package drivers;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat;
import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import mappers.HBaseMapper;

// args[0] = inputPath
// args[1] = outputPath
// args[2] = tableName

public class Driver {
  public static void main(String[] args) throws Exception {
    Path inputPath = new Path(args[0]);
    Path outputPath = new Path(args[1]);
    String tableName = args[2];

    Configuration conf = HBaseConfiguration.create();
    conf.set("hbase.table.name", tableName);

    Job job = new Job(conf, "NYCTaxi Bulk Import");
    job.setJarByClass(HBaseMapper.class);

    job.setMapperClass(HBaseMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);

    job.setInputFormatClass(TextInputFormat.class);

    HTable table = new HTable(conf, tableName);

    HFileOutputFormat.configureIncrementalLoad(job, table);

    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.waitForCompletion(true);
  }
}

这是我的pom.xml文件:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>MapReduce</groupId>
  <artifactId>InsertHBase2</artifactId>
  <version>1.0</version>
  <name>InsertHBase2</name>
<url>http://maven.apache.org</url>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    </properties>

    <dependencies>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>3.8.1</version>
            <scope>test</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.6.0-cdh5.9.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-yarn-common</artifactId>
            <version>2.6.0-cdh5.9.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-common</artifactId>
            <version>2.6.0-cdh5.9.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>2.6.0-cdh5.9.0</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>1.2.0-cdh5.9.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.2.0-cdh5.9.0</version>
        </dependency>   

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-common</artifactId>
            <version>1.2.0-cdh5.9.0</version>
        </dependency>       
    </dependencies>

    <repositories>
        <repository>
            <id>cloudera</id>
            <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
        </repository>
    </repositories>
</project>

当我在Maven依赖项中检查HBase jar时,我注意到HBaseConfiguration类存在。

enter image description here

我的Build路径中已经有了HBase jar

enter image description here

1 个答案:

答案 0 :(得分:2)

  

一般注意:NoClassDefFoundError在Java中:NoClassDefFoundError是一个错误

     当Java Runtime System尝试加载定义时抛出的

  类和类定义的不再可用。所需   类定义在编译时出现,但在运行时丢失了   时间。

以下依赖项应具有HBaseConfiguration

<dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-common</artifactId>
            <version>1.2.0-cdh5.9.0</version>
        </dependency> 

enter image description here

交叉检查您是否将相同的内容传递给类路径。

在下面的命令中,所有与hbase相关的jar都会出现

export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:`hbase classpath`
 echo $HADOOP_CLASSPATH // THIS IS TO ENSURE THAT YOU ARE NOT MISSING ANY HBASE JARS AGAIN

然后在jar中捆绑这个java,在hadoop集群中运行它,如下所示: -

hadoop jar <jarfile> <mainclass>