Question

我正在Java内实施电影推荐系统并一直关注此网站Link Here

输入： userId movieRatingCount,ratingSum,(movieId,movieRating)

17    1,3,(70,3)
35    1,1,(21,1)
49    3,7,(19,2 21,1 70,4)
87    2,3,(19,1 21,2)
98    1,2,(19,2)

代码：

def pairwise_items(self, user_id, values):
    item_count, item_sum, ratings = values
    #print item_count, item_sum, [r for r in combinations(ratings, 2)]
    #bottleneck at combinations
    for item1, item2 in combinations(ratings, 2):
        yield (item1[0], item2[0]), \
                (item1[1], item2[1])

输出：firstMovieId, secondMovieId firstRating,secondRating

例如，对于userId 49，他观看了3部电影。输出将是

firstMovie, secondMovie firstMovieRatings, secondMovieRatings
firstMovie, thirdMovie firstMovieRatings, thirdMovieRatings
secondMovie, thirdMovie secondMovieRatings, thirdMovieRatings

对于观看过1部电影的用户，将跳过该输出。

是否可以将此python代码转换为Java？我不知道map output key and value会是什么。以及解决这个问题的方法。提前谢谢！

Answer 1

Mapper Logic：

假设输入的键/值分隔为制表符。对于例如＆＃34; 49 3,7，（19,2 21,1 70,4）＆＃34;
在值中，它搜索＆＃34;（＆＃34;并解析＆＃34;（＆＃34;和＆＃34;）＆＃34;
它发出（键，值）为（UserId，（movieId，movieRating））。对于例如为纪录＆＃34; 49 3,7，（19,2 21,1 70,4）＆＃34;，它发出关键：49，价值：19,2 21,1 70,4

缩减逻辑：

它将值拆分为空白（＆＃34;＆＃34;）。对于例如它分裂＆＃34; 19,2 21,1 70,4＆＃34;分为3个字符串：＆＃34; 19,2＆＃34;，＆＃34; 21,1＆＃34;和＆＃34; 70,4＆＃34;。这些值将添加到ArrayList
计算这些值的所有双向组合。
最后将这些组合发射到输出。

以下是代码：

package com.myorg.hadooptests;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

public class MovieGroupings {

    public static class MovieGroupingsMapper
            extends Mapper<LongWritable, Text , Text, Text>{

        public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            String valueStr = value.toString().trim();
            String[] tokens = valueStr.split("\t"); // Assume key/values to be tab seperated. For e.g. "17    1,3,(70,3)"

            if(tokens.length == 2) {
                int index = tokens[1].indexOf('('); // Search for "(" character
                if(index != -1)
                {
                    context.write(new Text(tokens[0]), new Text(tokens[1].substring(index+1, tokens[1].length() - 1)));  // Exclude '(' and ')'
                }
            }
        }
    }

    public static class MovieGroupingsReducer
            extends Reducer<Text, Text, Text, Text> {

        public void reduce(Text key, Iterable<Text> values,
                           Context context) throws IOException, InterruptedException {

            for (Text value : values) {
                String[] tokens = value.toString().split(" "); // Split the values based on blank character

                if(tokens.length >= 2) // Ignore if there is only one movie
                {
                    for(int i = 0; i < tokens.length; i++)
                        for(int j = i + 1; j < tokens.length; j++) {
                            String groupings = tokens[i] + "," + tokens[j]; // Add 2 movies with ",". For e.g. "19,2,21,1"
                            String[] moviesAndRatings = groupings.split(",");
                            if (moviesAndRatings.length == 4)
                                context.write(new Text(moviesAndRatings[0] + "," + moviesAndRatings[2]),
                                        new Text(moviesAndRatings[1] + "," + moviesAndRatings[3]));
                        }
                }
            }
        }
    }

    public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf, "MovieGroupings");
        job.setJarByClass(MovieGroupings.class);
        job.setMapperClass(MovieGroupingsMapper.class);
        job.setReducerClass(MovieGroupingsReducer.class);
        job.setNumReduceTasks(5);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.addInputPath(job, new Path("/in/in5.txt"));
        FileOutputFormat.setOutputPath(job, new Path("/out/"));

        System.exit(job.waitForCompletion(true) ? 0:1);
    }
}

对于以下输入：

17      1,3,(70,3)
35      1,1,(21,1)
49      3,7,(19,2 21,1 70,4)
87      2,3,(19,1 21,2)
98      1,2,(19,2)

产生的输出是：

19,21   2,1
19,70   2,4
21,70   1,4
19,21   1,2

Java Hadoop MapReduce多键值

1 个答案: