如何将MapReduce的输出用于另一个MapReduce程序

时间:2017-09-26 04:25:19

标签: java hadoop mapreduce

我是hadoop的新手,我正在实施电影推荐系统。我必须使用Output of One Map减少程序到另一个,但我不知道如何做到这一点。我做的是我用自己的Map和Reduce Class编写两个驱动程序代码,而不是运行First Driver Code将其输出保存到HDFS,然后将该输出文件作为输入传递给第二个驱动程序代码。这是我的代码MRS首先是Map reduce,MRS2是第二个。

imports*

public class MRS {
public static class Map extends
        Mapper<LongWritable, Text, IntWritable, Text> {
    public void map(LongWritable key, Text value, Context con)
            throws IOException, InterruptedException {
        String line = value.toString();
        String[] s = line.split("\t");
        StringTokenizer token = new StringTokenizer(line);

        while (token.hasMoreTokens()) {
            IntWritable userId = new IntWritable(Integer.parseInt(token
                    .nextToken()));
            String movieId = token.nextToken();
            String ratings = token.nextToken();
            token.nextToken();
            con.write(userId, new Text(movieId + "," + ratings));
        }

    }
}

public static class Reduce extends
        Reducer<IntWritable, Text, IntWritable, Text> {
    public void reduce(IntWritable key, Iterable<Text> value, Context con)
            throws IOException, InterruptedException {
        int item_count = 0;
        int item_sum = 0;
        String result = "";
        for (Text t : value) {
            String s = t.toString();
            StringTokenizer token = new StringTokenizer(s, ",");

            result = result + " " + s + " ";

        }
        result = result.substring(0, result.length() - 1);


        con.write(key, new Text(result.trim()));
    }
}

public static void main(String[] args) throws IOException,
        ClassNotFoundException, InterruptedException {
    Configuration con = new Configuration();
    Job job = new Job(con, "Movie Recommendation");

    job.setJarByClass(MRS.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);

}

}


public class MRS2 {

public static class Map extends Mapper<LongWritable, Text, Text, Text> {

    Text movieSet = new Text();
    Text ratingSet = new Text();

    public void map(LongWritable key, Text value, Context con)
            throws IOException, InterruptedException {
        String line = value.toString();
        String[] split = null;
        StringTokenizer token = new StringTokenizer(line, "\t");
        token.nextToken();
        String s = token.nextToken();
        token = new StringTokenizer(s, " ");
        HashMap<String, String> hmap = new HashMap<String, String>();

        while (token.hasMoreTokens()) {
            split = token.nextToken().split(",");
            hmap.put(split[0], split[1]);

        }
        Set<String> set = hmap.keySet();
        for (String s1 : set) {
            for (String s2 : set) {
                if (Integer.parseInt(s1) < Integer.parseInt(s2)) {
                    movieSet.set(s1 + "," + s2);
                    ratingSet.set(hmap.get(s1) + "," + hmap.get(s2));

                    con.write(movieSet, ratingSet);
                }
            }
        }
    }
}

public static class Reduce extends Reducer<Text, Text, Text, Text> {
    public void reduce(Text key, Iterable<Text> value, Context con)
            throws IOException, InterruptedException {
        long sum_xy = 0;
        long sum_xx = 0;
        long sum_yy = 0;
        for (Text t : value) {
            String s = t.toString();
            StringTokenizer token = new StringTokenizer(s, ",");
            int s1 = Integer.parseInt(token.nextToken());
            int s2 = Integer.parseInt(token.nextToken());
            sum_xy = sum_xy + s1 * s2;
            sum_xx = sum_xx + s1 * s1;
            sum_yy = sum_yy + s2 * s2;
        }
        double similarity = 0.0;
        similarity = sum_xy / (Math.sqrt(sum_xx) * Math.sqrt(sum_yy));

        con.write(key, new Text(String.valueOf(similarity)));
    }
}

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    Configuration con = new Configuration();
    Job job = new Job(con, "Movie Recommendation");

    job.setJarByClass(MRS2.class);

    job.setMapperClass(Map.class);

    job.setReducerClass(Reduce.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);

}

}

所以,我想知道如何在一个驱动程序代码中执行此操作

0 个答案:

没有答案