在hadoop多节点群集中执行jar文件时出错

时间:2017-02-28 15:39:47

标签: java hadoop mapreduce

我试图在hadoop多节点集群上实现Eleuni_max应用程序。但是我在运行命令时遇到错误:

$ HADOOP_HOME / bin / hadoop jar units.jar hadoop.ProcessUnits input1_dir output_dir

..这是源代码:

package hadoop; 

import java.util.*; 

import java.io.IOException; 
import java.io.IOException; 

import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.conf.*; 
import org.apache.hadoop.io.*; 
import org.apache.hadoop.mapred.*; 
import org.apache.hadoop.util.*;



public class ProcessUnits 
{ 
   //Mapper class 
   public static class E_EMapper extends MapReduceBase implements 
   Mapper<LongWritable ,/*Input key Type */ 
   Text,                /*Input value Type*/ 
   Text,                /*Output key Type*/ 
   IntWritable>        /*Output value Type*/ 
   { 

  //Map function 
  public void map(LongWritable key, Text value, 
  OutputCollector<Text, IntWritable> output,   
  Reporter reporter) throws IOException 
  { 
     String line = value.toString(); 
     String lasttoken = null; 
     StringTokenizer s = new StringTokenizer(line,"\t"); 
     String year = s.nextToken(); 

     while(s.hasMoreTokens())
        {
           lasttoken=s.nextToken();
        } 

     int avgprice = Integer.parseInt(lasttoken); 
     output.collect(new Text(year), new IntWritable(avgprice)); 
  } 


} 


   //Reducer class 
   public static class E_EReduce extends MapReduceBase implements 
   Reducer< Text, IntWritable, Text, IntWritable > 
   {  


  //Reduce function 
  public void reduce( Text key, Iterator <IntWritable> values, 
     OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException 
     { 
        int maxavg=30; 
        int val=Integer.MIN_VALUE; 

        while (values.hasNext()) 
        { 
           if((val=values.next().get())>maxavg) 
           { 
              output.collect(key, new IntWritable(val)); 
           } 
        } 

     } 


}  


   //Main function 
   public static void main(String args[])throws Exception 
   { 
      JobConf conf = new JobConf(ProcessUnits.class); 

  conf.setJobName("max_eletricityunits"); 
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(IntWritable.class); 
  conf.setMapperClass(E_EMapper.class); 
  conf.setCombinerClass(E_EReduce.class); 
  conf.setReducerClass(E_EReduce.class); 
  conf.setInputFormat(TextInputFormat.class); 
  conf.setOutputFormat(TextOutputFormat.class); 

  FileInputFormat.setInputPaths(conf, new Path(args[0])); 
  FileOutputFormat.setOutputPath(conf, new Path(args[1])); 



    JobClient.runJob(conf); 
   } 
} 
error :
hadoop@hadoop:~$ $HADOOP_HOME/bin/hadoop jar units.jar hadoop.ProcessUnits input1_dir output_dir 
Warning: $HADOOP_HOME is deprecated.

Exception in thread "main" java.lang.ClassNotFoundException: hadoop.ProcessUnits
    at java.net.URLClassLoader$1.run(URLClassLoader.java:359)
    at java.net.URLClassLoader$1.run(URLClassLoader.java:348)
    at java.security.AccessController.doPrivileged(Native Method)
    at java.net.URLClassLoader.findClass(URLClassLoader.java:347)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
    at java.lang.Class.forName0(Native Method)
    at java.lang.Class.forName(Class.java:278)
    at org.apache.hadoop.util.RunJar.main(RunJar.java:153)

1 个答案:

答案 0 :(得分:0)

我复制了你的代码,并且能够开始处理,请检查你创建jar的方式。编译jar时可能不包括主类。

请您试试以下内容,我可以使用您的代码顺利运行: -

  1. echo $ JAVA_HOME。
  2. export HADOOP_CLASSPATH = $ JAVA_HOME / lib / tools.jar
  3. 创建包'hadoop'(mkdir hadoop)。 (维护hadoop的包名称结构以便理解)。
  4. 在hadoop文件夹中复制'ProcessUnits.java'。
  5. hadoop com.sun.tools.javac.Main hadoop / ProcessUnits.java
  6. jar -cf units.jar hadoop / ProcessUnits * .class
  7. 您的jar已创建。您可以运行以下命令: - hadoop jar units.jar hadoop / ProcessUnits inputDir outputDir