如何在Map Reduce(Java)中存储输入文件数组的输入

时间:2013-12-04 14:26:44

标签: hadoop mapreduce

我在java中编写了线性回归程序。 输入是 - > 2,21.05 3,23.51 4,24.23 5,27.71 6,30.86 8,45.85 10,52.12 11,55.98

在处理输入以减少任务之前,我希望在数组中存储输入,如x [] = {2,3,... 11}。然后将该数组变量发送到reduce()函数 但我只是在我的计划中获得价值。

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;






public class LinearRegression {


     public static class RegressionMapper extends
     Mapper<LongWritable, Text, Text, CountRegression> {

         private Text id = new Text();
            private CountRegression countRegression = new CountRegression();

            @Override
            public void map(LongWritable key, Text value, Context context)
                    throws IOException, InterruptedException {

                String tempString = value.toString();
                String[] inputData = tempString.split(",");
                String xVal = inputData[0];
                String yVal = inputData[1];
                countRegression.setxVal(Integer.parseInt(xVal));
                countRegression.setyVal(Float.parseFloat(yVal));
                id.set(xVal);
                context.write(id, countRegression);
            }
}

public static class RegressionReducer extends
     Reducer<Text, CountRegression, Text, CountRegression> {
    private CountRegression result = new CountRegression();
//  static float meanX = 0;
//  private float xValues[];
//  private  float yValues[];
    static float xRed = 0.0f;
    static float yRed = 0.3f;
    static float sum = 0;
    static ArrayList<Float> list = new ArrayList<Float>();   
    public void reduce(Text key, Iterable<CountRegression> values,
            Context context) throws IOException, InterruptedException {


        //float b = 0;

//        while(values.iterator().hasNext())
//        {
//          xRed = xRed + values.iterator().next().getxVal();
//          yRed = yRed + values.iterator().next().getyVal();
//        }
        for (CountRegression val : values) {

            list.add(val.getxVal());
//          list.add(val.getyVal());
//          xRed +=  val.getxVal();

//            yRed = val.getyVal();
//          meanX += val.getxVal();
            //xValues = val.getxVal();        
        }

        for (int i=0; i< list.size(); i++) {
            int lastIndex = list.listIterator().previousIndex();
              sum += list.get(lastIndex);
        }

        result.setxVal(sum);
        result.setyVal(yRed);

        context.write(key, result);
    }

}

public static class CountRegression implements Writable {

    private float xVal = 0;
    private float yVal = 0;

    public float getxVal() {
        return xVal;
    }

    public void setxVal(float x) {
        this.xVal = x;
    }

    public float getyVal() {
        return yVal;
    }

    public void setyVal(float y) {
        this.yVal = y;
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        xVal = in.readFloat();
        yVal = in.readFloat();

    }

    @Override
    public void write(DataOutput out) throws IOException {
        out.writeFloat(xVal);
        out.writeFloat(yVal);

    }
    @Override
    public String toString() {
        return "y = "+xVal+" +"+yVal+" x" ;
    }
}

public static void main(String[] args) throws Exception {


        //  Provides access to configuration parameters. 
        Configuration conf = new Configuration();

        // Create a new Job  It allows the user to configure the job, submit it, control its execution, and query the state.
        Job job = new Job(conf);

        //Set the user-specified job name.
        job.setJobName("LinearRegression");

        //Set the Jar by finding where a given class came from.
        job.setJarByClass(LinearRegression.class);

        //        Set the Mapper for the job.
        job.setMapperClass(RegressionMapper.class);

        //        Set the Combiner for the job.
        job.setCombinerClass(RegressionReducer.class);

        //        Set the Reducer for the job.
        job.setReducerClass(RegressionReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(CountRegression.class);

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

0 个答案:

没有答案