我在Java中远程提交MapReduce
作业有问题。我有一个CDH群集,我将mapred-site.xml
,yarn-site.xml
,core-site.xml
和hdfs-site.xml
复制到我的Idea项目中。但是我发现它总是在我的Windows PC中生成.staging
目录,所以即使localhost
中没有任何hadoop
环境,我认为该作业已提交给localhost
。
package com.mule.cell.basic;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class WordCount {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
final String INPUT_PATH = "/tmp/test.txt";
final String OUTPUT_PATH = "/tmp/output";
System.setProperty("HADOOP_USER_NAME", "hdfs");
ClassLoader classloader = Thread.currentThread().getContextClassLoader();
Configuration conf = new Configuration();
conf.addResource(classloader.getResource("yarn-site.xml"));
conf.addResource(classloader.getResource("mapred-site.xml"));
conf.addResource(classloader.getResource("hdfs-site.xml"));
conf.addResource(classloader.getResource("core-site.xml"));
Job job = Job.getInstance(conf, "yuri");
job.setMapperClass(com.mule.cell.basic.WordCountMap.class);
job.setCombinerClass(com.mule.cell.basic.WordCountReduce.class);
job.setReducerClass(com.mule.cell.basic.WordCountReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setJar("C:\\Users\\aoding\\workspace\\alextinng\\cell\\mapreduce\\target\\mapreduce-1.0-SNAPSHOT. jar");
job.setNumReduceTasks(1);
FileInputFormat.addInputPath(job, new Path(INPUT_PATH));
FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
但如果我像这样配置Configuration
个对象,则该作业已提交到远程集群并成功完成。
package com.mule.cell.basic;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class WordCount {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
final String INPUT_PATH = "/tmp/test.txt";
final String OUTPUT_PATH = "/tmp/output";
System.setProperty("HADOOP_USER_NAME", "hdfs");
ClassLoader classloader = Thread.currentThread().getContextClassLoader();
Configuration conf = new Configuration();
conf.set("yarn.resourcemanager.address", "xxxxx:8032");
conf.set("mapreduce.framework.name", "yarn");
conf.set("fs.default.name", "hdfs://xxxxxx:8020");
conf.set("mapreduce.app-submission.cross-platform", "true");
Job job = Job.getInstance(conf, "yuri");
job.setMapperClass(com.mule.cell.basic.WordCountMap.class);
job.setCombinerClass(com.mule.cell.basic.WordCountReduce.class);
job.setReducerClass(com.mule.cell.basic.WordCountReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setJar("C:\\Users\\aoding\\workspace\\alextinng\\cell\\mapreduce\\target\\mapreduce-1.0-SNAPSHOT. jar");
job.setNumReduceTasks(1);
FileInputFormat.addInputPath(job, new Path(INPUT_PATH));
FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}