Question

我最近安装了一个单节点hadoop集群，并且我成功地从输入文件运行wordcount作业。现在我正在尝试使用maven运行相同的jar但我遇到了这个错误：

[WARNING] 
java.lang.reflect.InvocationTargetException
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.codehaus.mojo.exec.ExecJavaMojo$1.run(ExecJavaMojo.java:293)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.ArrayIndexOutOfBoundsException: 1
at WordCount.main(WordCount.java:54)
... 6 more
[ERROR] Failed to execute goal org.codehaus.mojo:exec-maven
plugin:1.4.0:java (default-cli) on project avddb-wordcount: An exception 
occured while executing the Java class. null: InvocationTargetException 
-> [Help 1]
[ERROR] 
[ERROR] To see the full stack trace of the errors, re-run Maven with
the -e switch.
[ERROR] Re-run Maven using the -X switch to enable full debug logging.
[ERROR] 
[ERROR] For more information about the errors and possible solutions,
please read the following articles:
[ERROR] [Help 1] http://cwiki.apache.org/confluence/display/MAVEN
/MojoExecutionException

我通过带有cmds的终端运行它：

mvn compile
mvn package
mvn exec:java -Dexec.mainClass="WordCount" -Dexec.args="src/main/resources/input1.txt"

我的pom文件是：

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>com.ece.lab</groupId>
    <artifactId>avddb-wordcount</artifactId>
    <version>1.0-SNAPSHOT</version>
    <dependencies>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.6.0</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-core</artifactId>
            <version>1.2.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.6.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.6.0</version>
        </dependency>
    </dependencies>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>2.1</version>
                <configuration>
                    <source>1.7</source>
                    <target>1.7</target>
                </configuration>
            </plugin>
        </plugins>
    </build>
</project>

源代码是：

 import java.io.IOException;
 import java.util.StringTokenizer;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WordCount {

public static class TokenizerMapper extends Mapper<Object, Text, Text, 
IntWritable>{

  private final static IntWritable one = new IntWritable(1);
  private Text word = new Text();

   public void map(Object key, Text value, Context context) throws 
  IOException, InterruptedException {
    StringTokenizer itr = new StringTokenizer(value.toString());
    while (itr.hasMoreTokens()) {
    word.set(itr.nextToken());
    context.write(word, one);
    }
  }
 }

 public static class IntSumReducer
   extends Reducer<Text,IntWritable,Text,IntWritable> {
 private IntWritable result = new IntWritable();

 public void reduce(Text key, Iterable<IntWritable> values, Context  
 context) throws IOException, InterruptedException {
  int sum = 0;
  for (IntWritable val : values) {
    sum += val.get();
   }
    result.set(sum);
    context.write(key, result);
  }
}

public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
 }
 }

有什么想法吗？

Answer 1

您的wordcount申请中存在错误：

引起：java.lang.ArrayIndexOutOfBoundsException：1 在WordCount.main（WordCount.java:54）

Answer 2

您的数组大小可能为1，因此您将超过最后一个索引

因为你正在运行它

mvn exec:java -Dexec.mainClass="WordCount" -Dexec.args="src/main/resources/input1.txt"

你的args只有1个元素（输入路径），它位于索引[0]。您需要添加一个额外的参数来提供输出路径（如果没有指定，则选择一个默认路径）。如果你的args是

if(args.length > 0) { FileInputFormat.addInputPath(job, new Path(args[0])); } else { //throw an exception } if(args.length > 1) { FileOutputFormat.setOutputPath(job, new Path(args[1])); } else { FileOutputFormat.setOutputPath(job, new Path("<Default output path here>"); }

然后像这样运行它，例如

mvn exec:java -Dexec.mainClass="WordCount" -Dexec.args="'src/main/resources/input1.txt' 'src/main/output/output.txt'"

在访问其元素之前检查数组大小以避免ArrayIndexOutOfBoundsException

总是很好的做法

hadoop maven Java类null：InvocationTargetException

2 个答案: