我在需要链接的地方遇到问题
映射器>>减速器>>减速器
这是我的数据
Dpt.csv
EmpNo1,DeptNo1
EmpNo2,DeptNo2
EmpNo3,DeptNo1
EmpNo4,DeptNo2
EmpNo5,DeptNo2
EmpNo6,DeptNo1
Emp.csv
EmpNo1,10000
EmpNo2,4675432
EmpNo3,76568658
EmpNo4,241423
EmpNo5,75756
EmpNo6,9796854
最后我想要这样的东西:
第1部门>> Total_Salary_Dept_1
一个主要问题是,当我使用多个文件作为输入时,我的第一个reducer没有被调用。
第二个问题是我无法将该输出传递给下一个减速器。 (ChainReducer不能链接2个reducer)
我使用this作为参考,但很快意识到这无济于事。
我找到了this链接,作者在其中的一条评论中说: “在Hadoop 2.X系列中,您可以在内部使用ChainMapper在reducer之前链接映射器,在Chainer之后使用chain Mappers链接。带ChainReducer的减速器。“
这是否意味着我将具有如下结构:
Chain Mapper(映射器1)-> Chain Reducer(减少器1)-> ChainMapper(不必要的映射器)-> Chain Reducer(减少器2)
如果是这种情况,那么将数据从Reducer 1传递到Mapper 2的精确度如何?
有人可以帮我吗?
到目前为止,这是我的代码。
谢谢。
package Aggregate;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.chain.ChainMapper;
import org.apache.hadoop.mapreduce.lib.chain.ChainReducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.map.InverseMapper;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class Sales extends Configured implements Tool{
public static class CollectionMapper extends Mapper<LongWritable, Text, Text, Text>{
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String[] vals = value.toString().split(",");
context.write(new Text(vals[0]), new Text(vals[1]));
}
}
public static class DeptSalaryJoiner extends Reducer<Text, Text, Text, Text>{
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
ArrayList<String> DeptSal = new ArrayList<>();
for (Text val : values) {
DeptSal.add(val.toString());
}
context.write(new Text(DeptSal.get(0)), new Text(DeptSal.get(1)));
}
}
public static class SalaryAggregator extends Reducer<Text, Text, Text, IntWritable>{
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
Integer totalSal = 0;
for (Text val : values) {
Integer salary = new Integer(val.toString());
totalSal += salary;
}
context.write(key, new IntWritable(totalSal));
}
}
public static void main(String[] args) throws Exception {
int exitFlag = ToolRunner.run(new Sales(), args);
System.exit(exitFlag);
}
@Override
public int run(String[] args) throws Exception {
String input1 = "./emp.csv";
String input2 = "./dept.csv";
String output = "./DeptAggregate";
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "Sales");
job.setJarByClass(getClass());
Configuration mapConf = new Configuration(false);
ChainMapper.addMapper(job, CollectionMapper.class, LongWritable.class, Text.class, Text.class, Text.class, mapConf);
Configuration reduce1Conf = new Configuration(false);
ChainReducer.setReducer(job, DeptSalaryJoiner.class, Text.class, Text.class, Text.class, Text.class, reduce1Conf);
Configuration reduce2Conf = new Configuration(false);
ChainReducer.setReducer(job, SalaryAggregator.class, Text.class, Text.class, Text.class, IntWritable.class, reduce2Conf);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(input1));
FileInputFormat.addInputPath(job, new Path(input2));
try {
File f = new File(output);
FileUtils.forceDelete(f);
} catch (Exception e) {
}
FileOutputFormat.setOutputPath(job, new Path(output));
return job.waitForCompletion(true) ? 0 : 1;
}
}