所以我今天注意到我的map-reduce代码有奇怪的行为。花了3个小时试图弄清楚,仍然一无所获。
我正在尝试根据此数据集找到3个简单问题的答案:
我写了我的地图精简代码,并注意到对于每个代码,映射器正在为不同的行数运行。
对于Q1-3251行,Q2-3251行和Q3-33行!
我不明白为什么会这样。
public static void main(String[] args) throws Exception {
String inputFilePath = "./database.csv";
String outputFilePath = "./<BASED_ON_QUESTION>";
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "<BASED_ON_QUESTION>");
job.setJarByClass(YearlyAwards.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(inputFilePath));
try {
File f = new File(outputFilePath);
FileUtils.forceDelete(f);
} catch (Exception e) {
}
FileOutputFormat.setOutputPath(job, new Path(outputFilePath));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
第一季度
public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> {
Integer count = 0;
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String[] quoteLessVal = value.toString().split("\"");
value = new Text(String.join("", quoteLessVal));
String[] values = value.toString().split(",");
String year = values[0];
String win = values[3];
count += 1;
if (!win.equals("")) {
context.write(new Text(year), new IntWritable(new Integer(win)));
}
}
@Override
protected void cleanup(Mapper<Object, Text, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
super.cleanup(context);
System.out.println(count);
}
}
第二季度
public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> {
Integer count = 0;
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
count += 1;
String[] quoteLessVal = value.toString().split("\"");
value = new Text(String.join("", quoteLessVal));
String[] values = value.toString().split(",");
String name = values[4];
String win = values[3];
if (!win.equals("")) {
context.write(new Text(name), new IntWritable(new Integer(win)));
}
}
@Override
protected void cleanup(Mapper<Object, Text, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
super.cleanup(context);
System.out.println(count);
}
}
第三季度
public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> {
Integer count = 0;
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
count += 1;
String[] quoteLessVal = value.toString().split("\"");
value = new Text(String.join("", quoteLessVal));
String[] values = value.toString().split(",");
String name = values[5];
String win = values[3];
count += 1;
if (!win.equals("")) {
context.write(new Text(name), new IntWritable(new Integer(win)));
}
}
@Override
protected void cleanup(Mapper<Object, Text, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
super.cleanup(context);
System.out.println(count);
}
}
(所有Q的公平标准)
public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
Integer count = 0;
for (IntWritable val : values) {
count += 1;
}
System.out.println(key + " > " + count);
context.write(key, new IntWritable(count));
}
}
我认为代码的执行由于某种原因而暂停,因为我在输出文件夹中没有任何输出文件(part-r-00000)!