我是java和hadoop的新手。感谢您的任何帮助。我尝试在两个表上进行连接操作。 ValueWrapper是一个使用Writable接口的自定义类型,我也把它放在stdRepartition包中。我使用命令行来运行它。过程和结果如下:
结果:
javac StdRepartition.java ValueWrapper.java
jar -cvf StdRepartition.jar ./*.class
added manifest
adding:StdRepartition
adding:StdRepartition$DataMapper.class
adding:StdRepartition$StdReducer.class
adding:ValueWrapper.class
hadoop jar StdRepartition.jar stdRepartition.StdRepartition input output
Exception in thread "main" java.lang.ClassNotFoundException: stdRepartition.StdRepartition
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:270)
at org.apache.hadoop.util.RunJar.main(RunJar.java:205)
代码:
package stdRepartition;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
// import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class StdRepartition {
public static class DataMapper extends Mapper<Object, Text, IntWritable, ValueWrapper> {
private Text flag = new Text();
private Text content = new Text();
private ValueWrapper valueWrapper = new ValueWrapper();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException{
FileSplit fileSplit = (FileSplit)context.getInputSplit();
String filename = fileSplit.getPath().getName();
int ID;
if(filename.endsWith("data.txt")) {
String[] parts = value.toString().split("s+");
ID = Integer.parseInt(parts[0]);
flag = new Text("data");
content = value;
}
else {
String[] parts = value.toString().split("\\|");
ID = Integer.parseInt(parts[0]);
flag = new Text("user");
content = new Text(parts[2]);
}
valueWrapper.setFlag(flag);
valueWrapper.setContent(content);
context.write(new IntWritable(ID), valueWrapper);
}
}
public static class StdReducer extends Reducer<IntWritable, ValueWrapper, NullWritable, Text> {
private ArrayList<Text> ratings = new ArrayList<Text>();
private Text age = new Text();
public void reduce(IntWritable key, Iterable<ValueWrapper> value, Context context) throws IOException, InterruptedException {
for(ValueWrapper val: value) {
Text flag = val.getFlag();
if(flag.toString().equals("user")) {
age = val.getContent();
}
else {
ratings.add(val.getContent());
}
}
String curAge = age.toString();
for(Text r: ratings) {
String curR = r.toString();
curR = curR + " " + curAge;
context.write(NullWritable.get(), new Text(curR));
}
}
}
@SuppressWarnings("deprecation")
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "StdRepartition");
job.setJarByClass(StdRepartition.class);
job.setMapperClass(DataMapper.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(ValueWrapper.class);
job.setReducerClass(StdReducer.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);
// MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, DataMapper.class);
// MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, DataMapper.class);
// Set the input path to be a directory
FileInputFormat.setInputPaths(job, args[0]);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true)? 0:1);
}
}
答案 0 :(得分:0)
我知道原因。需要将目录外的.class文件打包到jar包中。谢谢你的帮助。我也想知道如何在这里编辑帖子。
答案 1 :(得分:0)
要执行地图缩减程序,您必须执行以下步骤 -
最后执行以下命令。
hadoop jar [jar file name with fully qualified] [driver class name with fully qualified] /[input path] /[output path]
这是非常简单和基本的Hello World in Map Reduce step by step guide。