我的代码对我来说似乎是对的,但在cmd上它没有给出正确的输出,请任何人帮我在代码中找到问题。它运行正常,但输出错误:
package test;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
//import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class temp {
public static class Mymapper extends Mapper<Object, Text, IntWritable,Text> {
public void map(Object key, Text value,Context context) throws IOException, InterruptedException{
int month=Integer.parseInt(value.toString().substring(16, 18));
IntWritable mon=new IntWritable(month);
String temp=value.toString().substring(26,30);
String t=null;
for(int i=0;i<temp.length();i++){
if(temp.charAt(i)==',')
break;
else
t=t+temp.charAt(i);
}
Text data=new Text(value.toString().substring(21, 25)+t);
context.write(mon, data);
}
}
public static class Myreducer extends Reducer<IntWritable,Text,IntWritable,IntWritable> {
public void reduce(IntWritable key,Iterable<Text> values,Context context) throws IOException, InterruptedException{
String temp="";
int max=0;
for(Text t:values)
{
temp=t.toString();
if(temp.substring(0, 4)=="TMAX"){
if(Integer.parseInt(temp.substring(4,temp.length()))>max){
max=Integer.parseInt(temp.substring(4,temp.length()));
}
}
}
context.write(key,new IntWritable(max));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "temp");
job.setJarByClass(temp.class);
job.setMapperClass(Mymapper.class);
job.setReducerClass(Myreducer.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
}
输入文件:
USC00300379,19000121,TMAX,-78 ,,, 6,
USC00300379,19000131,TMAX,-133 ,,, 6,
USC00300379,19000111,TMAX,127 ,,, 6,
此代码的输出是:
12 0
13 0
11 0
答案 0 :(得分:1)
看起来你已经将索引换了一个(应该是子串(15,17)而不是子串(16,18)),但是我无法弄清楚为什么基于剪切的输入文件(也许你错过了什么?)
由于转移的指数,您会收到12,13,11而不是01,01,01。由于指数转移,您收到0作为最高温度&#34; TMAX&#34;没有出现在字符串的开头。
建议:
建议: 不要每次都在mapper和reducer中创建Text和IntWritable实例。你可以使它成为实例字段并使用set方法(它优化内存)
答案 1 :(得分:0)
以下代码将解决此问题。用以下代码替换Mymapper和Myreducer
public static class Mymapper extends Mapper<Object, Text, IntWritable,Text> {
public void map(Object key, Text value,Context context) throws IOException, InterruptedException{
String line = value.toString();
String[] elements = line.split(",");
int month=Integer.parseInt(elements[1].substring(4,6));
Text data=new Text(elements[3]);
context.write(new IntWritable(month), data);
}
}
public static class Myreducer extends Reducer<IntWritable,Text,IntWritable,IntWritable> {
public void reduce(IntWritable key,Iterable<Text> values,Context context) throws IOException, InterruptedException{
int temp=0;
int max=0;
for(Text t:values)
{
String tmp = t.toString().trim();
if(!tmp.equals("")) {
temp=Integer.parseInt(tmp);
if (temp > max) {
max = temp;
}
}
}
context.write(key,new IntWritable(max));
}
}
答案 2 :(得分:0)
让我们先拍下记录:
USC00300379,19000121,TMAX,-78,,,6,
在此记录中19000121
:
1900 - Year
01 - Month
21 - Day
所以,预期的输出应该是:
21 0 (Since -78 < 0)
31 0 (Since -133 < 0)
11 127 (Since 127 > 0)
要实现此目的,您需要在代码中进行以下更改。
map()方法:
解析月份
变化:
int month=Integer.parseInt(value.toString().substring(16, 18));
要:
int month=Integer.parseInt(value.toString().substring(18,20));
字符串的初始化
变化:
String t=null;
要:
String t=""
reduce()方法:
字符串比较
变化:
if(temp.substring(0, 4)=="TMAX"){
要:
if(temp.substring(0, 4).equals("TMAX")){
通过这些更改,我得到了以下输出:
11 127
21 0
31 0