下面是我的代码,显然,我已经用job.setJarByClass(
KnnDriver.class)来设置jar。
有人可以帮我调试吗?
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class KnnDriver extends Configured implements Tool{
/*
* args = N, test.csv, train.csv, outputpath
*/
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new KnnDriver(), args);
System.exit(res);
}
@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
conf.set("N", args[0]);
Job job = Job.getInstance(conf, "K-Nearest-Neighbor mapreduce");
job.setJarByClass(KnnDriver.class);
job.addCacheFile(new URI(args[1]));
if (args.length != 4) {
System.err.println("Number of parameter is not correct!");
System.exit(2);
}
job.setMapperClass(KnnMapper.class);
job.setReducerClass(KnnReducer.class);
// TODO: specify output types
job.setOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(DistClassPair.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
// TODO: specify input and output DIRECTORIES (not files)
FileInputFormat.setInputPaths(job, new Path(args[2]));
Path outputPath = new Path(args[3]);
FileSystem.get(conf).delete(outputPath, true);
FileOutputFormat.setOutputPath(job, outputPath);
return(job.waitForCompletion(true) ? 0 : -1);
}
}
此外,这里是堆栈信息
2016-02-25 04:00:31,240 WARN [main] util.NativeCodeLoader (NativeCodeLoader.java:<clinit>(62)) - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2016-02-25 04:00:31,449 INFO [main] Configuration.deprecation (Configuration.java:warnOnceIfDeprecated(1173)) - session.id is deprecated. Instead, use dfs.metrics.session-id
2016-02-25 04:00:31,450 INFO [main] jvm.JvmMetrics (JvmMetrics.java:init(76)) - Initializing JVM Metrics with processName=JobTracker, sessionId=
2016-02-25 04:00:31,603 WARN [main] mapreduce.JobResourceUploader (JobResourceUploader.java:uploadFiles(171)) - No job jar file set. User classes may not be found. See Job or Job#setJar(String).
2016-02-25 04:00:31,650 INFO [main] input.FileInputFormat (FileInputFormat.java:listStatus(283)) - Total input paths to process : 1
2016-02-25 04:00:31,697 INFO [main] mapreduce.JobSubmitter (JobSubmitter.java:submitJobInternal(198)) - number of splits:1
2016-02-25 04:00:31,778 INFO [main] mapreduce.JobSubmitter (JobSubmitter.java:printTokens(287)) - Submitting tokens for job: job_local225811898_0001
2016-02-25 04:00:32,014 INFO [main] mapred.LocalDistributedCacheManager (LocalDistributedCacheManager.java:symlink(201)) - Creating symlink: /tmp/hadoop-xupengtong/mapred/local/1456390831864/test.csv <- /Volumes/TONY/Big_Data/KnnMapReduce/test.csv
2016-02-25 04:00:32,019 INFO [main] mapred.LocalDistributedCacheManager (LocalDistributedCacheManager.java:setup(165)) - Localized file:/Volumes/TONY/Big_Data/KnnMapReduce/data/test.csv as file:/tmp/hadoop-xupengtong/mapred/local/1456390831864/test.csv
2016-02-25 04:00:32,058 INFO [main] mapreduce.Job (Job.java:submit(1294)) - The url to track the job: http://localhost:8080/
2016-02-25 04:00:32,059 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1339)) - Running job: job_local225811898_0001
2016-02-25 04:00:32,060 INFO [Thread-21] mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(471)) - OutputCommitter set in config null
2016-02-25 04:00:32,064 INFO [Thread-21] output.FileOutputCommitter (FileOutputCommitter.java:<init>(100)) - File Output Committer Algorithm version is 1
2016-02-25 04:00:32,066 INFO [Thread-21] mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(489)) - OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
2016-02-25 04:00:32,094 INFO [Thread-21] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(448)) - Waiting for map tasks
2016-02-25 04:00:32,095 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:run(224)) - Starting task: attempt_local225811898_0001_m_000000_0
2016-02-25 04:00:32,115 INFO [LocalJobRunner Map Task Executor #0] output.FileOutputCommitter (FileOutputCommitter.java:<init>(100)) - File Output Committer Algorithm version is 1
2016-02-25 04:00:32,121 INFO [LocalJobRunner Map Task Executor #0] util.ProcfsBasedProcessTree (ProcfsBasedProcessTree.java:isAvailable(192)) - ProcfsBasedProcessTree currently is supported only on Linux.
2016-02-25 04:00:32,121 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:initialize(612)) - Using ResourceCalculatorProcessTree : null
2016-02-25 04:00:32,124 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:runNewMapper(756)) - Processing split: file:/Volumes/TONY/Big_Data/KnnMapReduce/data/train.csv:0+4447014
2016-02-25 04:00:32,229 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:setEquator(1205)) - (EQUATOR) 0 kvi 26214396(104857584)
2016-02-25 04:00:32,229 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(998)) - mapreduce.task.io.sort.mb: 100
2016-02-25 04:00:32,229 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(999)) - soft limit at 83886080
2016-02-25 04:00:32,229 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(1000)) - bufstart = 0; bufvoid = 104857600
2016-02-25 04:00:32,229 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(1001)) - kvstart = 26214396; length = 6553600
2016-02-25 04:00:32,233 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:createSortingCollector(403)) - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
2016-02-25 04:00:32,352 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1460)) - Starting flush of map output
2016-02-25 04:00:32,360 INFO [Thread-21] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(456)) - map task executor complete.
2016-02-25 04:00:32,361 WARN [Thread-21] mapred.LocalJobRunner (LocalJobRunner.java:run(560)) - job_local225811898_0001
java.lang.Exception: java.lang.NullPointerException
at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:462)
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:522)
Caused by: java.lang.NullPointerException
at java.util.StringTokenizer.<init>(StringTokenizer.java:199)
at java.util.StringTokenizer.<init>(StringTokenizer.java:221)
at KnnMapper.setup(KnnMapper.java:67)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:143)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:787)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:243)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
2016-02-25 04:00:33,061 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1360)) - Job job_local225811898_0001 running in uber mode : false
2016-02-25 04:00:33,062 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1367)) - map 0% reduce 0%
2016-02-25 04:00:33,066 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1380)) - Job job_local225811898_0001 failed with state FAILED due to: NA
2016-02-25 04:00:33,070 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1385)) - Counters: 0
答案 0 :(得分:0)
确保初始化map和reduce中使用的所有变量。 我遇到了同样的问题