为什么我的hadoop集群很慢

时间:2015-12-17 20:36:43

标签: hadoop dictionary mapreduce reduce

我在hadoop 1.0.3中的map-reduce中运行矩阵乘法,系统包含16个数据节点:主节点和15个从节点;所有这些都是云中的虚拟机,每个都有16GB内存,150HHD。

这是映射器:

public class OneStepMatrixMultiplication {
 /*******************Mapper class**********************/
//key=Byteupset of line(LongWritable)
//value=Text line

    public static class Map extends Mapper<LongWritable, Text, Text, Text> {

    public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            Configuration conf = context.getConfiguration();

            int m = Integer.parseInt(conf.get("m"));
            int p = Integer.parseInt(conf.get("p"));
            String line = value.toString();
            String[] indicesAndValue = line.split(",");
            Text outputKey = new Text();
            Text outputValue = new Text();
            if (indicesAndValue[0].equals("A")) {
                for (int k = 0; k < p; k++) {
                    outputKey.set(indicesAndValue[1] + "," + k);
                    outputValue.set("A," + indicesAndValue[2] + "," + indicesAndValue[3]);
                    context.write(outputKey, outputValue);
                }
            } else {
                for (int i = 0; i < m; i++) {
                    outputKey.set(i + "," + indicesAndValue[2]);
                    outputValue.set("B," + indicesAndValue[1] + "," + indicesAndValue[3]);
                    context.write(outputKey, outputValue);
                }
            }
        }
    }

这是减速器:

/*************************Reducer Class*************************************/
    public static class Reduce extends Reducer<Text, Text, Text, Text> {
        public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

         String[] value;
            HashMap<Integer, Float> hashA = new HashMap<Integer, Float>();
            HashMap<Integer, Float> hashB = new HashMap<Integer, Float>();
            for (Text val : values) {

        value = val.toString().split(",");
                if (value[0].equals("A")) {
                    hashA.put(Integer.parseInt(value[1]), Float.parseFloat(value[2]));
                } else {
                    hashB.put(Integer.parseInt(value[1]), Float.parseFloat(value[2]));
                }
            }

        double[] myList = new double[10];
        for (Float value1 : hashA.values()) {
            System.out.println("Value of A = " + value1);


        }



        int n = Integer.parseInt(context.getConfiguration().get("n"));
            float result = 0.0f;
            float a_ij;
            float b_jk;
            for (int j = 0; j < n; j++) {
                a_ij = hashA.containsKey(j) ? hashA.get(j) : 0.0f;
                System.out.println(a_ij);
        b_jk = hashB.containsKey(j) ? hashB.get(j) : 0.0f;
                System.out.println(b_jk);
        result += a_ij * b_jk;
            }
            if (result != 0.0f) {
                context.write(null, new Text(key.toString() + "," + Float.toString(result)));
            }
        }
    }

主要是: -

/***********************Driver(main) function***********************************/
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
    // A is an m-by-n matrix; B is an n-by-p matrix.
    conf.set("m", "3000");
        conf.set("n", "3000");
        conf.set("p", "3000");


        Job job = Job.getInstance(conf, "MatrixMatrixMultiplicationOneStep");
        job.setJarByClass(OneStepMatrixMultiplication.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.submit();
    }
}

当我开始运行程序时,映射器非常慢,在5分钟或更长时间内几乎通过1%! conf文件有replication = 16,最大堆= 6000MB

如何加快跑步?!!或者如果这是正常时间? **注意:输入文件接近115MB。

you can take alook in the mapping

0 个答案:

没有答案