maxtempmapper.java类:
package com.hadoop.gskCodeBase.maxTemp;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class MaxTempMapper extends Mapper<LongWritable,Text,Text,IntWritable> {
private static final int MISSING=9999;
@Override
public void map(LongWritable kay,Text value,Context context) throws IOException,InterruptedException {
String line = value.toString();
String year = line.substring(15,19);
int airTemperature;
if(line.charAt(87)== '+'){
airTemperature=Integer.parseInt(line.substring(88, 92));
}else{
airTemperature=Integer.parseInt(line.substring(87, 92));
}
String quality=line.substring(92,93);
if(airTemperature !=MISSING && quality.matches("[01459]")){
context.write(new Text(year), new IntWritable(airTemperature));
}
}
}
maxtempreducer.java类:
package com.hadoop.gskCodeBase.maxTemp;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class MaxTempReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
@Override
public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException,InterruptedException {
int maxValue = Integer.MIN_VALUE;
for(IntWritable value : values){
maxValue=Math.max(maxValue, value.get());
}
context.write(key, new IntWritable(maxValue));
}
}
maxtempdriver.java类:
package com.hadoop.gskCodeBase.maxTemp;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class MaxTempDriver extends Configured implements Tool{
public int run(String[] args) throws Exception{
if(args.length !=2){
System.err.println("UsageTemperatureDriver <input path> <outputpath>");
System.exit(-1);
}
Job job = Job.getInstance();
job.setJarByClass(MaxTempDriver.class);
job.setJobName("Max Temperature");
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
job.setMapperClass(MaxTempMapper.class);
job.setReducerClass(MaxTempReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
System.exit(job.waitForCompletion(true) ? 0:1);
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}
public static void main(String[] args) throws Exception {
MaxTempDriver driver = new MaxTempDriver();
int exitCode = ToolRunner.run(driver, args);
System.exit(exitCode);
}
}
我必须使用命令提示符在Windows上的单节点hadoop集群上执行上述三个类 有人可以帮助我如何在命令提示符(Windows)上执行这三个类?
答案 0 :(得分:0)
将所有java文件存档到单个.jar文件中。然后像往常一样运行它。在Windows中,通过Cygwin终端运行Hadoop更容易。您可以通过以下命令执行作业:
hadoop jar <path to .jar> <path to input folder in hdfs> <path to output folder in hdfs>
例如:
hadoop jar wordcount.jar /input /output
<强> -UPDATE - 强>
您应该将您的驱动程序类分配给job.setJarByClass()。在这种情况下,它将是您的MaxTempDriver.class
在eclipse中,您可以通过右键单击源文件夹&gt;创建一个jar文件。出口&gt; JAR文件。从那里你可以按照步骤。您也可以在此过程中设置主类。
希望这能回答你的问题。