我是hadoop的新手。我按照maichel-noll教程在单个节点中设置了hadoop。我尝试运行WordCount程序。这是我使用的代码:
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCount {
public static class TokenizerMapper
extends Mapper<Object, Text, Text, IntWritable>{
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(Object key, Text value, Context context
) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
context.write(word, one);
}
}
}
public static class IntSumReducer
extends Reducer<Text,IntWritable,Text,IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values,
Context context
) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "WordCount");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
这是我在尝试运行时得到的结果。
hduser@aswin-HP-Pavilion-15-Notebook-PC:/usr/local/hadoop$ bin/hadoop jar wc.jar WordCount /home/hduser/gutenberg /home/hduser/gutenberg-output/sample.txt
Exception in thread "main" java.lang.NoClassDefFoundError: WordCount (wrong name: org/myorg/WordCount)
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:788)
at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
at java.net.URLClassLoader.defineClass(URLClassLoader.java:447)
at java.net.URLClassLoader.access$100(URLClassLoader.java:71)
at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
at java.lang.ClassLoader.loadClass(ClassLoader.java:411)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:270)
at org.apache.hadoop.util.RunJar.main(RunJar.java:205)
任何人都可以帮助我。 我的班级路径:
hduser@aswin-HP-Pavilion-15-Notebook-PC:/usr/local/hadoop$ hadoop classpath
/usr/local/hadoop/etc/hadoop:/usr/local/hadoop/share/hadoop/common/lib/*:/usr/local/hadoop/share/hadoop/common/*:/usr/local/hadoop/share/hadoop/hdfs:/usr/local/hadoop/share/hadoop/hdfs/lib/*:/usr/local/hadoop/share/hadoop/hdfs/*:/usr/local/hadoop/share/hadoop/yarn/lib/*:/usr/local/hadoop/share/hadoop/yarn/*:/usr/local/hadoop/share/hadoop/mapreduce/lib/*:/usr/local/hadoop/share/hadoop/mapreduce/*:/usr/lib/jvm/java-7-openjdk-i386/lib/tools.jar:/usr/local/hadoop/contrib/capacity-scheduler/*.jar
答案 0 :(得分:5)
试试这个,
import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
public class WordCount {
public static class Map extends MapReduceBase implements
Mapper<LongWritable, Text, Text, IntWritable> {
@Override
public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
throws IOException {
String line = value.toString();
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
value.set(tokenizer.nextToken());
output.collect(value, new IntWritable(1));
}
}
}
public static class Reduce extends MapReduceBase implements
Reducer<Text, IntWritable, Text, IntWritable> {
@Override
public void reduce(Text key, Iterator<IntWritable> values,
OutputCollector<Text, IntWritable> output, Reporter reporter)
throws IOException {
int sum = 0;
while (values.hasNext()) {
sum += values.next().get();
}
output.collect(key, new IntWritable(sum));
}
}
public static void main(String[] args) throws Exception {
JobConf conf = new JobConf(WordCount.class);
conf.setJobName("wordcount");
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
conf.setMapperClass(Map.class);
conf.setReducerClass(Reduce.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
JobClient.runJob(conf);
}
}
然后运行命令
bin/hadoop jar WordCount.jar WordCount /hdfs_Input_filename /output_filename
如果您的代码在特定包中,那么您必须提及包含类名
的包名称bin/hadoop jar WordCount.jar PakageName.WordCount /hdfs_Input_filename /output_filename
答案 1 :(得分:1)
包装有问题。 你应该试试这个:
jar cf wc.jar WordCount*.class
注意有一个符号'*'
答案 2 :(得分:0)
您正在课堂上使用套餐。所以你的命令应该是
bin/hadoop jar wc.jar org.myorg.WordCount /home/hduser/gutenberg /home/hduser/gutenberg-output/sample.txt
答案 3 :(得分:0)
这可能听起来很疯狂。我在代码中添加了package org.myorg;
并再次编译。我将类文件放在org / myorg文件夹中,并使用它们创建了jar文件。然后我使用jar wc.jar org.myorg.WordCount
命令运行并成功执行。如果有人能解释我它是如何运行的话会很好:D。无论如何,非常感谢帮助我们。
答案 4 :(得分:0)
尝试在jar文件中明确包含嵌套类(即TokenizerMapper
和IntSumReducer
)。我是这样做的:
jar cvf WordCount.jar WordCount.class WordCount\$TokenizerMapper.class WordCount\$IntSumReducer.class
答案 5 :(得分:0)
基肖尔的答案,让我朝着正确的方向前进, 如果有可能我想证实这一点,请报告我对使用Java代码进行稀疏矩阵混合的实验:
1)源代码(从https://github.com/marufaytekin/MatrixMultiply/tree/master/src/main/java/com/lendap/hadoop下载),并保存在/ home / hduser / playground / src / matrixMult
中2)下载数据集(来自https://github.com/marufaytekin/MatrixMultiply/tree/master/input的矩阵M和N,然后保存在HDFS中,其路径如下:/ user / hduser / inMatrix
3)使用hadoop类进行编译,并在Playground / classes5中创建Java类: javac -classpath $ HADOOP_HOME / share / hadoop / common / lib / activation-1.1.jar:$ HADOOP_HOME / share / hadoop / common / hadoop-common-2.7.1.jar:/usr/hadoop/hadoop-2.7.1/ share / hadoop / mapreduce / * -d操场/ classes5操场/ src / matrixMult / *
4)使用以下命令创建jar文件MatrixMultiply.jar: jar -cvf游乐场/MatrixMultiply.jar -C游乐场/ classes5 /。
5)hadoop mapReduce命令(在$ HADOOP_HOME路径中,在我的情况下为/usr/hadoop/hadoop-2.7.1$ hadoop jar /home/hduser/playground/MatrixMultiply.jar com.lendap.hadoop.MatrixMultiply / user / hduser / inMatrix / outputMatrix
6)在我的4个节点群集上正确执行mapreduce作业。这是最终输出的一部分:
0,375,890.0 0,376,1005.0 0,377,1377.0 0,378,604.0 0,379,924.0 0,38,476.0 0,380,621.0 0,381,730.0
990,225,542.0 990,226,639.0 990,227,466.0 990,228,406.0 990,229,343.0 990,23,397.0 990,230,794.0