我试图找出当我尝试在hadoop上运行时,下面的Java无效的原因。
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
public class PageStat implements Tool {
private Configuration conf;
@Override
public int run(String[] args) throws Exception {
Job job = new Job(getConf());
String jobName = "Page visit statistics MR";
job.setJobName(jobName);
job.setJarByClass(PageStat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(PageStat.PageStatMapper.class);
job.setReducerClass(PageStat.PageStatReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));
int status = job.waitForCompletion(true) ? 0 : 1;
return status;
}
public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new PageStat(), args);
System.exit(exitCode);
}
public void setConf(Configuration conf) {
this.conf = conf;
}
public Configuration getConf() {
return conf;
}
public static class PageStatMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private Text keyHolder = new Text();
private IntWritable valueHolder = new IntWritable();
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String[] items = value.toString().split(",");
if (items.length == 3) {
String url = items[1];
keyHolder.set(url);
Integer duration = Integer.parseInt(items[2]);
valueHolder.set(duration);
context.write(keyHolder, valueHolder);
} else {
context.getCounter("Error", "invalidData").increment(1);
}
}
}
public static class PageStatReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private Text keyHolder = new Text();
private IntWritable valueHolder = new IntWritable();
private String statType;
private int count;
private int totalTime;
private int avTime;
protected void setup(Context context) throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
statType = conf.get("page.stat");
}
protected void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
count = 0;
totalTime = 0;
for (IntWritable value : values){
++count;
totalTime += value.get();
}
avTime = totalTime / count;
keyHolder.set(key);
if (statType.equals("average")){
valueHolder.set(avTime);
} else {
valueHolder.set(totalTime);
}
context.write(keyHolder, valueHolder);
}
}
}
错误是:
c:\ hadoop-training \ tutorial02-jobtracker&gt; hadoop jar PageStat.jar PageStat jobtra cker / input / visit_5000000.txt jobtracker / output 13/07/29 11:24:50 INFO input.FileInputFormat:到的总输入路径 进程:1 log4j:ERROR无法重命名 [c:\ Hadoop \ hadoop-1.1.0-SNAPSHOT \ logs / hadoop.log] t o [C:\ Hadoop的\ Hadoop的1.1.0-SNAPSHOT \日志/ hadoop.log.2013-07-26]。 13/07/29 11:24:51 INFO util.NativeCodeLoader:加载native-hadoop库 13/07/29 11:24:51 WARN snappy.LoadSnappy:Snappy原生图书馆没有 13/07/29 11:24:54 INFO mapred.JobClient:正在运行的工作: job_201307261340_0001 13/07/29 11:24:55 INFO mapred.JobClient:map 0% 减少0%13/07/29 11:25:24 INFO mapred.JobClient:地图1%减少0% 13/07/29 11:25:27 INFO mapred.JobClient:地图6%减少0%13/07/29 11:25:30 INFO mapred.JobClient:地图14%减少0%13/07/29 11:25:35 INFO mapred.JobClient:map 22%reduce 0%13/07/29 11:25:38 INFO mapred.JobClient:地图31%减少0%13/07/29 11:25:41信息 mapred.JobClient:地图35%减少0%13/07/29 11:25:44信息 mapred.JobClient:map 44%reduce 0%13/07/29 11:25:47 INFO mapred.JobClient:map 50%reduce 0%13/07/29 11:26:03 INFO mapred.JobClient:地图60%减少0%13/07/29 11:26:06信息 mapred.JobClient:map 64%reduce 0%13/07/29 11:26:09 INFO mapred.JobClient:地图69%减少0%13/07/29 11:26:12信息 mapred.JobClient:地图76%减少0%13/07/29 11:26:15信息 mapred.JobClient:地图81%减少0%13/07/29 11:26:18信息 mapred.JobClient:地图85%减少0%13/07/29 11:26:21信息 mapred.JobClient:地图87%减少0%13/07/29 11:26:24信息 mapred.JobClient:地图92%减少0%13/07/29 11:26:27信息 mapred.JobClient:地图94%减少0%13/07/29 11:26:30信息 mapred.JobClient:地图96%减少0%13/07/29 11:26:33信息 mapred.JobClient:map 97%reduce 0%13/07/29 11:26:37 INFO mapred.JobClient:地图99%减少8%13/07/29 11:26:40信息 mapred.JobClient:地图100%减少8%13/07/29 11:26:46信息 mapred.JobClient:地图100%减少25%13/07/29 11:26:54信息 mapred.JobClient:任务ID:attempt_201307261340_0001_r_0 00000_0, 状态:FAILED java.lang.NullPointerException 在PageStat $ PageStatReducer.reduce(PageStat.java:120) 在PageStat $ PageStatReducer.reduce(PageStat.java:96) 在org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:177) 在org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:651) ) 在org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:418) 在org.apache.hadoop.mapred.Child $ 4.run(Child.java:271) at java.security.AccessController.doPrivileged(Native Method) 在javax.security.auth.Subject.doAs(Subject.java:396) 在org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInforma tion.java:1135) 在org.apache.hadoop.mapred.Child.main(Child.java:265)
13/07/29 11:26:56 INFO mapred.JobClient:地图100%减少0%13/07/29 11:27:05 INFO mapred.JobClient:地图100%减少8%13/07/29 11:27:08 INFO mapred.JobClient:地图100%减少33%13/07/29 11:27:10信息 mapred.JobClient:任务ID:attempt_201307261340_0001_r_0 00000_1, 状态:FAILED java.lang.NullPointerException 在PageStat $ PageStatReducer.reduce(PageStat.java:120) 在PageStat $ PageStatReducer.reduce(PageStat.java:96) 在org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:177) 在org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:651) ) 在org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:418) 在org.apache.hadoop.mapred.Child $ 4.run(Child.java:271) at java.security.AccessController.doPrivileged(Native Method) 在javax.security.auth.Subject.doAs(Subject.java:396) 在org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInforma tion.java:1135) 在org.apache.hadoop.mapred.Child.main(Child.java:265)
13/07/29 11:27:11 INFO mapred.JobClient:地图100%减少0%13/07/29 11:27:20 INFO mapred.JobClient:地图100%减少8%13/07/29 11:27:23 INFO mapred.JobClient:地图100%减少25%13/07/29 11:27:25信息 mapred.JobClient:任务ID:attempt_201307261340_0001_r_0 00000_2, 状态:FAILED java.lang.NullPointerException 在PageStat $ PageStatReducer.reduce(PageStat.java:120) 在PageStat $ PageStatReducer.reduce(PageStat.java:96) 在org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:177) 在org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:651) ) 在org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:418) 在org.apache.hadoop.mapred.Child $ 4.run(Child.java:271) at java.security.AccessController.doPrivileged(Native Method) 在javax.security.auth.Subject.doAs(Subject.java:396) 在org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInforma tion.java:1135) 在org.apache.hadoop.mapred.Child.main(Child.java:265)
13/07/29 11:27:26 INFO mapred.JobClient:地图100%减少0%13/07/29 11:27:38 INFO mapred.JobClient:地图100%减少25%13/07/29 11:27:41 INFO mapred.JobClient:map 100%reduce 0%13/07/29 11:27:43 INFO mapred.JobClient:工作完成:job_201307261340_0001 13/07/29 11:27:43 INFO mapred.JobClient:Counters:24 13/07/29 11:27:43 INFO mapred.JobClient:Job Counters 13/07/29 11:27:43 INFO mapred.JobClient:推出减少任务= 4 13/07/29 11:27:43 INFO mapred.JobClient:SLOTS_MILLIS_MAPS = 179086 13/07/29 11:27:43 INFO mapred.JobClient:所有人花费的总时间减少了wai ting之后 保留插槽(ms)= 0 13/07/29 11:27:43 INFO mapred.JobClient:
在保留插槽(ms)= 0后,所有映射等待的总时间 13/07/29 11:27:43 INFO mapred.JobClient:推出地图任务= 4 13/07/29 11:27:43 INFO mapred.JobClient:数据本地地图任务= 4 13/07/29 11:27:43 INFO mapred.JobClient:失败减少任务= 1 13/07/29 11:27:43 INFO mapred.JobClient:
SLOTS_MILLIS_REDUCES = 106513 13/07/29 11:27:43 INFO mapred.JobClient:
FileSystemCounters 13/07/29 11:27:43 INFO mapred.JobClient:
FILE_BYTES_READ = 179504086 13/07/29 11:27:43 INFO mapred.JobClient:
HDFS_BYTES_READ = 254931072 13/07/29 11:27:43 INFO mapred.JobClient:
FILE_BYTES_WRITTEN = 359099432 13/07/29 11:27:43 INFO mapred.JobClient: 文件输入格式计数器13/07/29 11:27:43 INFO mapred.JobClient:
字节读取= 254930544 13/07/29 11:27:43 INFO mapred.JobClient:
Map-Reduce Framework 13/07/29 11:27:43 INFO mapred.JobClient:Map 输出物化字节= 17949 9502 13/07/29 11:27:43信息 mapred.JobClient:合并输出记录= 0 13/07/29 11:27:43 INFO mapred.JobClient:地图输入记录= 5000000 13/07/29 11:27:43 INFO mapred.JobClient:物理内存(字节)snapshot = 85 1607552 13/07/29 11:27:43 INFO mapred.JobClient:溢出记录= 10000000 13/07/29 11:27:43 INFO mapred.JobClient:地图输出 bytes = 169499478 13/07/29 11:27:43 INFO mapred.JobClient:CPU时间 花(ms)= 81308 13/07/29 11:27:43 INFO mapred.JobClient:总计 已提交的堆使用量(字节)= 746323968 13/07/29 11:27:43信息 mapred.JobClient:虚拟内存(字节)snapshot = 988 401664 13/07/29 11:27:43 INFO mapred.JobClient:合并输入记录= 0 13/07/29 11:27:43 INFO mapred.JobClient:地图输出 记录= 5000000 13/07/29 11:27:43 INFO mapred.JobClient:
SPLIT_RAW_BYTES = 528
感谢!!!
答案 0 :(得分:1)
我有类似的问题,你需要使用-D标志来执行:
-Dpage.stat=total
您可能会看到错误:
log4j:WARN No appenders could be found for logger (org.apache.hadoop.hdfs.DFSClient).
log4j:WARN Please initialize the log4j system properly.
这不是完整的答案,我自己仍然深究其中。
答案 1 :(得分:0)
堆栈跟踪中的行号似乎与发布的源代码对齐。自此次运行后代码是否已更改?
可能在if(statType ...)行上发生NullPointerException。我没有在配置中看到任何设置“page.stat”的内容,要么在run方法中进行硬编码,要么在作业提交中作为参数传递。这将导致statType成员初始化为null。