我使用MapReduce Jobs Performance进行了一些测试。
我确信并理解Using Combiner改善了作业的性能。 请在表格中找到工作的统计数据。从理论上讲,使用多个Reducer进行并行处理可以改善Job的运行时间,但统计数据并不能证明这一点。有人可以回答为什么多个减速器没有改善工作性能。
CPU time spent (ms)=27380
Physical memory (bytes) snapshot=1430097920
Virtual memory (bytes) snapshot=7202680832
Total committed heap usage (bytes)=502865920
Total time spent by all map tasks (ms)=61027
Total time spent by all reduce tasks (ms)=273752
CPU time spent (ms)=17280
Physical memory (bytes) snapshot=491397120
Virtual memory (bytes) snapshot=1753538560
Total committed heap usage (bytes)=291643392
Total time spent by all map tasks (ms)=37374
Total time spent by all reduce tasks (ms)=11416
Here is the Code :
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private Text Words = new Text();
private final static IntWritable one = new IntWritable(1);
@Override
public void map(LongWritable k1, Text v1, Context con) throws IOException, InterruptedException
{
String line = v1.toString();
String tokens[] = line.split(" ");
int count = tokens.length;
int i = 0;
while (count >= 1)
{
//String a1 = tokens[i];
if(tokens[i].contains("172"))
{
Words.set(tokens[i]);
}
i++;
count --;
con.write(Words, one);
}
}}
import java.io.IOException;
import java.lang.Iterable;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Reducer;
public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key1, Iterable<IntWritable> values1, Context context) throws IOException, InterruptedException
{
int sum=0;
IntWritable result = new IntWritable();
for (IntWritable val : values1)
{
sum = sum + val.get();
}
result.set(sum);
context.write(key1, result);
}
}
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
//import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class WordCountJob extends Configured implements Tool
{
public int run(String args[]) throws Exception
{
Configuration conf = getConf();
@SuppressWarnings("deprecation")
Job job = new Job(conf);
job.setJarByClass(WordCountJob.class);
Path in = new Path(args[0]);
Path out = new Path(args[1]);
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);
//job.setCombinerClass(WordCountReducer.class);
job.setNumReduceTasks(1);
job.getNumReduceTasks();
job.getCluster();
//job.getStartTime();
//job.getStatus();
//Location of the Input File; Type of the File;
FileInputFormat.addInputPath(job, in);
job.setInputFormatClass(TextInputFormat.class);
//Location of the Output File; Type of the File
FileOutputFormat.setOutputPath(job, out);
job.setOutputFormatClass(TextOutputFormat.class) ;
//Type of the Data written in Output File.
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.waitForCompletion(true);
return 0 ;
}
public static void main(String pars[]) throws Exception
{
int output = ToolRunner.run(new Configuration(),new WordCountJob(), pars);
System.exit(output);
}
}