我是MapReduce的新手,非常感谢您的反馈。我有一个文本文件,其中包含foll中的数据。格式 -
State1 County1 Students#(Integer) Teacher#(Integer) Classrooms#(Integer)
State1 County2 Students#(Integer) Teacher#(Integer) Classrooms#(Integer)
State1 County3 Students#(Integer) Teacher#(Integer) Classrooms#(Integer)
State2 County1 Students#(Integer) Teacher#(Integer) Classrooms#(Integer)
State2 County2 Students#(Integer) Teacher#(Integer) Classrooms#(Integer)
State2 County3 Students#(Integer) Teacher#(Integer) Classrooms#(Integer)
我写了mapreducer,给了我一个foll。输出 -
State1 Total<Students#>
State2 Total<Students#>
我需要同样适用于教师和课堂小组 - 需要最终输出:
State1 Total<Students#> Total<Teacher#> Total<Classrooms#>
State2 Total<Students#> Total<Teacher#> Total<Classrooms#>
答案 0 :(得分:1)
你也可以在下面试试
public class MultiColSumDemo extends Configured implements Tool {
public static void main(String[] args) throws Exception {
ToolRunner.run(new Configuration(), new MultiColSumDemo(), args);
}
@Override
public int run(String[] arg0) throws Exception {
getConf().set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", " ");
Job job = Job.getInstance(getConf());
job.setJobName("MultiColSumDemo");
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapperClass(MultiColMapper.class);
job.setReducerClass(MultiColReduce.class);
job.setInputFormatClass(KeyValueTextInputFormat.class);
FileInputFormat.setInputPaths(job, new Path("input/sum_multi_col"));
FileOutputFormat.setOutputPath(job, new Path("sum_multi_col_otput" + System.currentTimeMillis()));
job.setJarByClass(MultiColSumDemo.class);
job.submit();
return 1;
}
class MultiColMapper extends Mapper<Text, Text, Text, Text> {
@Override
protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
System.out.println(key + " " + value);
context.write(key, value);
}
}
class MultiColReduce extends Reducer<Text, Text, Text, Text> {
@Override
protected void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
Iterator<Text> it = values.iterator();
StringTokenizer st = null;
HashMap<String, Integer> sumCol = new HashMap<>();
String[] name = null;
System.out.println(key + " " + values);
while (it.hasNext()) {
st = new StringTokenizer(it.next().toString(), " ");
st.nextToken();
while (st.hasMoreTokens()) {
name = st.nextToken().split("#");
if (sumCol.get(name[0]) == null)
sumCol.put(name[0], Integer.parseInt(name[1]));
else
sumCol.put(name[0], sumCol.get(name[0]) + Integer.parseInt(name[1]));
}
}
StringBuilder sb = new StringBuilder();
for (Entry<String, Integer> val : sumCol.entrySet())
sb.append(val.getKey() + "#" + val.getValue());
System.out.println(key + " " + sb);
context.write(key, new Text(sb.toString()));
}
}
}
答案 1 :(得分:0)
您没有提供您尝试过的代码,因此我假设您按州进行映射,然后在reducer中汇总Student。
总结教师和教室的逻辑是完全一样的。 而不是生成一个(键,值)对,其中IntWritable中的值,您可以将值设置为文本,您将在其中附加所有总和。否则,您可以定义自己的Writable类,该类将包含三个整数(学生,教师,教室)。
同样适用于地图阶段的值;而不是IntWritable,提供一个Text(您感兴趣的字段的连接以及您将在reduce阶段中解析的字段),或者自定义Writable类。
我认为你已经有了聚合(求和)计数的逻辑,因为它与学生完全一样。
答案 2 :(得分:0)
你可以试试这个。我能够得到输出。
映射器代码
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class Exercisemapper extends Mapper<LongWritable,Text,Text,Text>
{
public void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException
{
String orig_val=value.toString();
String[] orig_val1=orig_val.split(",");
String state_val=orig_val1[0];
String other_counts=orig_val1[2]+","+orig_val1[3]+","+orig_val1[4];
context.write(new Text(state_val),new Text(other_counts));
}
}
减速机代码:
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class ExerciseReducer extends Reducer<Text,Text,Text,Text>
{
public void reduce(Text key,Iterable<Text> value,Context context) throws IOException,InterruptedException
{
Map<Integer,Integer> mymap=new HashMap<Integer,Integer>();
StringBuilder sb=new StringBuilder();
int myval=0;
for(Text s:value)
{
String comma_values=s.toString();
String[] comma_values_arr=comma_values.split(",");
for(int i=0;i<comma_values_arr.length;i++)
{
if(mymap.get(i)==null)
mymap.put(i,Integer.parseInt(comma_values_arr[i]));
else
{
myval=mymap.get(i)+Integer.parseInt(comma_values_arr[i]);
mymap.put(i,myval);
}
}
}
for(Integer finalval:mymap.values())
{
sb.append(finalval.toString());
sb.append("\t");
}
context.write(key,new Text(sb.toString().replaceAll("\t$","")));
}
}
驱动程序代码
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class ExerciseDriver {
public static void main(String args[]) throws Exception
{
if(args.length!=2)
{
System.err.println("Usage: Worddrivernewapi <input path> <output path>");
System.exit(-1);
}
Job job=new Job();
job.setJarByClass(ExerciseDriver.class);
job.setJobName("ExerciseDriver");
FileInputFormat.addInputPath(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
job.setMapperClass(Exercisemapper.class);
job.setReducerClass(ExerciseReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(1);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
并且在驱动程序代码中将reducer的数量设置为1。