如何在Mapper中更新MapReduce作业参数

时间:2015-06-24 08:35:31

标签: java hadoop mapreduce

我想在Mapper类中工作时更新我设置的参数(在Driver类中)。

我试过了,

context.getConfiguration()。set(“arg”,“updatedvalue”)

在mapper中。它确实更新了它,但减速器的输出都是零。

请帮忙。

映射器: -

public class RecMap extends Mapper<LongWritable, Text, Text, Text> {
    public static TreeMap<String,Integer> co_oc_mat=new TreeMap<String,Integer>();
    public static HashMap<String,Float> user_scoring_mat=new HashMap<String,Float>();
    public static TreeMap<String,Float> sorted_user_scoring_mat=new TreeMap<String,Float>();
    public static ArrayList<String> vals=new ArrayList<String>();
    public static ArrayList<Integer> unique_items=new ArrayList<Integer>();
    public static ArrayList<Integer> unique_users=new ArrayList<Integer>();
    public static int a=0;
    public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        ++a;
        String b=value.toString();
        vals.add(b);
        String[] parts=b.split("\\,");
        user_scoring_mat.put(parts[0]+","+parts[1], Float.parseFloat(parts[2]));
    }
    @Override
    public void cleanup(Context context) throws IOException, InterruptedException{
        co_oc_mat.putAll(new get_co_oc_mat().get(vals, a));
        unique_users.addAll(new get_unique_users().get(vals, a));
        unique_items.addAll(new get_unique_items().get(vals, a));
        for(int i=0;i<unique_users.size();i++){
            for(int j=0;j<unique_items.size();j++){
                if(!user_scoring_mat.containsKey(unique_users.get(i)+","+unique_items.get(j))){
                    user_scoring_mat.put(unique_users.get(i)+","+unique_items.get(j), 0.0f);
                }
            }
        }
        sorted_user_scoring_mat.putAll(user_scoring_mat);
        String prev="null";int row_num=-1;String value="A";
        String prev2="null";int col_num=-1;String value2="B";

        //Transmitting co_oc_mat
        for(Entry<String, Integer> entry: co_oc_mat.entrySet()){
            String check_val=entry.getKey().split("\\,")[0];
            if(!prev.contentEquals(check_val)){
                if(row_num==-1){
                    prev=check_val;
                    ++row_num;
                }
                else{
                    for(int i=0;i<unique_users.size();i++){
                        String key=row_num+","+i;
                        context.write(new Text(key), new Text(value));
                    }
                    value="A";
                    prev=check_val;
                    ++row_num;
                }
            }
            value=value+","+entry.getValue();
        }
        for(int i=0;i<unique_users.size();i++){
            String key=row_num+","+i;
            context.write(new Text(key), new Text(value));
        }

        //Transmitting sorted_user_scoring_mat
        for(Entry<String, Float> entry: sorted_user_scoring_mat.entrySet()){
            //context.write(new Text(entry.getKey()), new Text(String.valueOf(entry.getValue())));
            String check_val=entry.getKey().split("\\,")[0];
            if(!prev2.contentEquals(check_val)){
                if(col_num==-1){
                    prev2=check_val;
                    ++col_num;
                }
                else{
                    for(int i=0;i<unique_items.size();i++){
                        String key=i+","+col_num;
                        context.write(new Text(key), new Text(value2));
                    }
                    value2="B";
                    prev2=check_val;
                    ++col_num;
                }
            }
            value2=value2+","+entry.getValue();
        }
        for(int i=0;i<unique_items.size();i++){
            String key=i+","+col_num;
            context.write(new Text(key), new Text(value2));
        }
        context.getConfiguration().setInt("n", unique_items.size());
    }
}

减速机: -

import java.io.IOException;
import java.util.HashMap;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;


public class RecReduce extends
Reducer<Text, Text, Text, Text> {
    public static int n=0;
    @Override
    public void setup(Context context) throws IOException, InterruptedException{
        n=context.getConfiguration().getInt("n", 1);
    }
    public void reduce(Text key, Iterable<Text> values, Context context)
            throws IOException, InterruptedException {
        String[] value;
        HashMap<Integer, Float> hashA = new HashMap<Integer, Float>();
        HashMap<Integer, Float> hashB = new HashMap<Integer, Float>();
        for (Text val : values) {
            value = val.toString().split(",");
            if (value[0].equals("A")) {
                for(int z=1;z<=n;z++){
                    hashA.put(z, Float.parseFloat(value[z]));}
            } else{
                for(int a=1;a<=n;a++){
                    hashB.put(a, Float.parseFloat(value[a]));}
            }
        }
        float result = 0.0f;
        float a_ij;
        float b_jk;
        for (int j=1;j<=n;j++) {
            a_ij = hashA.containsKey(j) ? hashA.get(j) : 0.0f;
            b_jk = hashB.containsKey(j) ? hashB.get(j) : 0.0f;
            result +=a_ij*b_jk;
        }
        context.write(null, new Text(key.toString() + "," + Float.toString(result)));
    }
}

驱动: -

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class RecDriver {
    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration conf = new Configuration();
        conf.setInt("n", 0);
        Job job = new Job(conf, "Recommendations_CollaborativeFiltering");
        job.setJarByClass(RecDriver.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        job.setMapperClass(RecMap.class);
        //job.setNumReduceTasks(0);
        //Don't use combiner if there is no scope of combining the output. Otherwise the job will get stuck.
        //job.setCombinerClass(RecReduce.class);
        job.setReducerClass(RecReduce.class);

        FileInputFormat.addInputPath(job, new Path("/home/gts1/Desktop/recommendation.txt"));

        FileOutputFormat.setOutputPath(job, new Path("/home/gts1/Desktop/rec1_out"));
        System.exit(job.waitForCompletion(true)?0:1);
    }
}

这是我得到的输出: -

0,0,0.0
0,1,0.0
0,2,0.0
0,3,0.0
0,4,0.0
1,0,0.0
1,1,0.0
1,2,0.0
1,3,0.0
1,4,0.0
2,0,0.0
2,1,0.0
2,2,0.0
2,3,0.0
2,4,0.0
3,0,0.0
3,1,0.0
3,2,0.0
3,3,0.0
3,4,0.0

1 个答案:

答案 0 :(得分:1)

正如Hadoop API文档中提到的JobContext提供了A read-only view of the job that is provided to the tasks while they are running.所以,应该可以在mapper / reducer方法的上下文中获取参数值,但不能设置它们。

当必须在不同的流程机器上使用这样的协调时,必须使用Apache ZooKeeper来设置映射器中的值并在reducer中获得相同的值。