我是hadoop的新手,我正在实施电影推荐系统。我必须使用Output of One Map减少程序到另一个,但我不知道如何做到这一点。我做的是我用自己的Map和Reduce Class编写两个驱动程序代码,而不是运行First Driver Code将其输出保存到HDFS,然后将该输出文件作为输入传递给第二个驱动程序代码。这是我的代码MRS首先是Map reduce,MRS2是第二个。
imports*
public class MRS {
public static class Map extends
Mapper<LongWritable, Text, IntWritable, Text> {
public void map(LongWritable key, Text value, Context con)
throws IOException, InterruptedException {
String line = value.toString();
String[] s = line.split("\t");
StringTokenizer token = new StringTokenizer(line);
while (token.hasMoreTokens()) {
IntWritable userId = new IntWritable(Integer.parseInt(token
.nextToken()));
String movieId = token.nextToken();
String ratings = token.nextToken();
token.nextToken();
con.write(userId, new Text(movieId + "," + ratings));
}
}
}
public static class Reduce extends
Reducer<IntWritable, Text, IntWritable, Text> {
public void reduce(IntWritable key, Iterable<Text> value, Context con)
throws IOException, InterruptedException {
int item_count = 0;
int item_sum = 0;
String result = "";
for (Text t : value) {
String s = t.toString();
StringTokenizer token = new StringTokenizer(s, ",");
result = result + " " + s + " ";
}
result = result.substring(0, result.length() - 1);
con.write(key, new Text(result.trim()));
}
}
public static void main(String[] args) throws IOException,
ClassNotFoundException, InterruptedException {
Configuration con = new Configuration();
Job job = new Job(con, "Movie Recommendation");
job.setJarByClass(MRS.class);
job.setMapperClass(Map.class);
job.setCombinerClass(Reduce.class);
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
public class MRS2 {
public static class Map extends Mapper<LongWritable, Text, Text, Text> {
Text movieSet = new Text();
Text ratingSet = new Text();
public void map(LongWritable key, Text value, Context con)
throws IOException, InterruptedException {
String line = value.toString();
String[] split = null;
StringTokenizer token = new StringTokenizer(line, "\t");
token.nextToken();
String s = token.nextToken();
token = new StringTokenizer(s, " ");
HashMap<String, String> hmap = new HashMap<String, String>();
while (token.hasMoreTokens()) {
split = token.nextToken().split(",");
hmap.put(split[0], split[1]);
}
Set<String> set = hmap.keySet();
for (String s1 : set) {
for (String s2 : set) {
if (Integer.parseInt(s1) < Integer.parseInt(s2)) {
movieSet.set(s1 + "," + s2);
ratingSet.set(hmap.get(s1) + "," + hmap.get(s2));
con.write(movieSet, ratingSet);
}
}
}
}
}
public static class Reduce extends Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterable<Text> value, Context con)
throws IOException, InterruptedException {
long sum_xy = 0;
long sum_xx = 0;
long sum_yy = 0;
for (Text t : value) {
String s = t.toString();
StringTokenizer token = new StringTokenizer(s, ",");
int s1 = Integer.parseInt(token.nextToken());
int s2 = Integer.parseInt(token.nextToken());
sum_xy = sum_xy + s1 * s2;
sum_xx = sum_xx + s1 * s1;
sum_yy = sum_yy + s2 * s2;
}
double similarity = 0.0;
similarity = sum_xy / (Math.sqrt(sum_xx) * Math.sqrt(sum_yy));
con.write(key, new Text(String.valueOf(similarity)));
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration con = new Configuration();
Job job = new Job(con, "Movie Recommendation");
job.setJarByClass(MRS2.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
所以,我想知道如何在一个驱动程序代码中执行此操作