我在java中编写了线性回归程序。 输入是 - > 2,21.05 3,23.51 4,24.23 5,27.71 6,30.86 8,45.85 10,52.12 11,55.98
在处理输入以减少任务之前,我希望在数组中存储输入,如x [] = {2,3,... 11}。然后将该数组变量发送到reduce()函数 但我只是在我的计划中获得价值。
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class LinearRegression {
public static class RegressionMapper extends
Mapper<LongWritable, Text, Text, CountRegression> {
private Text id = new Text();
private CountRegression countRegression = new CountRegression();
@Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String tempString = value.toString();
String[] inputData = tempString.split(",");
String xVal = inputData[0];
String yVal = inputData[1];
countRegression.setxVal(Integer.parseInt(xVal));
countRegression.setyVal(Float.parseFloat(yVal));
id.set(xVal);
context.write(id, countRegression);
}
}
public static class RegressionReducer extends
Reducer<Text, CountRegression, Text, CountRegression> {
private CountRegression result = new CountRegression();
// static float meanX = 0;
// private float xValues[];
// private float yValues[];
static float xRed = 0.0f;
static float yRed = 0.3f;
static float sum = 0;
static ArrayList<Float> list = new ArrayList<Float>();
public void reduce(Text key, Iterable<CountRegression> values,
Context context) throws IOException, InterruptedException {
//float b = 0;
// while(values.iterator().hasNext())
// {
// xRed = xRed + values.iterator().next().getxVal();
// yRed = yRed + values.iterator().next().getyVal();
// }
for (CountRegression val : values) {
list.add(val.getxVal());
// list.add(val.getyVal());
// xRed += val.getxVal();
// yRed = val.getyVal();
// meanX += val.getxVal();
//xValues = val.getxVal();
}
for (int i=0; i< list.size(); i++) {
int lastIndex = list.listIterator().previousIndex();
sum += list.get(lastIndex);
}
result.setxVal(sum);
result.setyVal(yRed);
context.write(key, result);
}
}
public static class CountRegression implements Writable {
private float xVal = 0;
private float yVal = 0;
public float getxVal() {
return xVal;
}
public void setxVal(float x) {
this.xVal = x;
}
public float getyVal() {
return yVal;
}
public void setyVal(float y) {
this.yVal = y;
}
@Override
public void readFields(DataInput in) throws IOException {
xVal = in.readFloat();
yVal = in.readFloat();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeFloat(xVal);
out.writeFloat(yVal);
}
@Override
public String toString() {
return "y = "+xVal+" +"+yVal+" x" ;
}
}
public static void main(String[] args) throws Exception {
// Provides access to configuration parameters.
Configuration conf = new Configuration();
// Create a new Job It allows the user to configure the job, submit it, control its execution, and query the state.
Job job = new Job(conf);
//Set the user-specified job name.
job.setJobName("LinearRegression");
//Set the Jar by finding where a given class came from.
job.setJarByClass(LinearRegression.class);
// Set the Mapper for the job.
job.setMapperClass(RegressionMapper.class);
// Set the Combiner for the job.
job.setCombinerClass(RegressionReducer.class);
// Set the Reducer for the job.
job.setReducerClass(RegressionReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(CountRegression.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}