我正在编写一个mapper类,它应该从HDFS位置读取文件并为每个文件创建一个记录(使用自定义类)。 Mapper类的代码: -
package com.nayan.bigdata.hadoop;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.log4j.Logger;
/**
* @file : FileToRecordMapper.java
* @author : nayan
* @version : 1.0.0
* @date : 27-Aug-2013 12:13:44 PM
* @desc : Mapper class to read files and convert it into records.
*/
public class FileToRecordMapper extends
Mapper<LongWritable, Text, Text, RecordWritable> {
private static Logger logger = Logger.getLogger(FileToRecordMapper.class);
List<Path> allPaths;
FileSystem fs;
@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
logger.info("Inside cleanup method.");
}
@Override
protected void map(LongWritable key, Text value,
Context context)
throws IOException, InterruptedException {
logger.info("Starting map method of FileToRecordMapper class.");
for(Path path : allPaths) {
FSDataInputStream in = this.fs.open(path);
Text filePath = new Text(path.getName());
Text directoryPath = new Text(path.getParent().getName());
Text filename = new Text(path.getName().substring(path.getName().lastIndexOf('/') + 1,
path.getName().length()));
byte[] b = new byte[1024];
StringBuilder contentBuilder = new StringBuilder();
while ((in.read(b)) > 0) {
contentBuilder.append(new String(b, "UTF-8"));
}
Text fileContent = new Text(contentBuilder.toString());
in.close();
RecordWritable record = new RecordWritable(filePath, filename,
fileContent, new LongWritable(System.currentTimeMillis()));
logger.info("Record Created : " + record);
context.write(directoryPath, record);
logger.info("map method of FileToRecordMapper class completed.");
}
}
@Override
public void run(Context context)
throws IOException, InterruptedException {
logger.info("Inside run method.");
}
@Override
protected void setup(Context context)
throws IOException, InterruptedException {
logger.info("Inside setup method.");
try {
logger.info("Starting configure method of FileToRecordMapper class.");
fs = FileSystem.get(context.getConfiguration());
Path path = new Path(context.getConfiguration().get("mapred.input.dir"));
allPaths = getAllPaths(path);
} catch (IOException e) {
logger.error("Error while fetching paths.", e);
}
logger.info("Paths : " + ((null != allPaths) ? allPaths : "null"));
logger.info("configure method of FileToRecordMapper class completed.");
super.setup(context);
}
private List<Path> getAllPaths(Path path) throws IOException {
ArrayList<Path> paths = new ArrayList<Path>();
getAllPaths(path, paths);
return paths;
}
private void getAllPaths(Path path, List<Path> paths) throws IOException{
try {
if (!this.fs.isFile(path)) {
for (FileStatus s : fs.listStatus(path)) {
getAllPaths(s.getPath(), paths);
}
} else {
paths.add(path);
}
} catch (IOException e) {
logger.error("File System Exception.", e);
throw e;
}
}
}
记录类是: -
package com.nayan.bigdata.hadoop;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
/**
* @file : RecordWritable.java
* @author : nayan
* @version : 1.0.0
* @date : 21-Aug-2013 1:53:12 PM
* @desc : Class to create a record in Accumulo
*/
public class RecordWritable implements Writable {
private Text filePath;
private Text fileName;
private Text fileContent;
private LongWritable timeStamp;
public RecordWritable() {
this.filePath = new Text();
this.fileName = new Text();
this.fileContent = new Text();
this.timeStamp = new LongWritable(System.currentTimeMillis());
}
/**
* @param filePath
* @param fileName
* @param fileContent
* @param timeStamp
*/
public RecordWritable(Text filePath, Text fileName, Text fileContent,
LongWritable timeStamp) {
this.filePath = filePath;
this.fileName = fileName;
this.fileContent = fileContent;
this.timeStamp = timeStamp;
}
public Text getFilePath() {
return filePath;
}
public void setFilePath(Text filePath) {
this.filePath = filePath;
}
public Text getFileName() {
return fileName;
}
public void setFileName(Text fileName) {
this.fileName = fileName;
}
public Text getFileContent() {
return fileContent;
}
public void setFileContent(Text fileContent) {
this.fileContent = fileContent;
}
public LongWritable getTimeStamp() {
return timeStamp;
}
public void setTimeStamp(LongWritable timeStamp) {
this.timeStamp = timeStamp;
}
@Override
public int hashCode() {
return this.filePath.getLength() + this.fileName.getLength() + this.fileContent.getLength();
}
@Override
public boolean equals(Object obj) {
if(obj instanceof RecordWritable) {
RecordWritable otherRecord = (RecordWritable) obj;
return this.filePath.equals(otherRecord.filePath) && this.fileName.equals(otherRecord.fileName);
}
return false;
}
@Override
public String toString() {
StringBuilder recordDesc = new StringBuilder("Record Details ::\t");
recordDesc.append("File Path + ").append(this.filePath).append("\t");
recordDesc.append("File Name + ").append(this.fileName).append("\t");
recordDesc.append("File Content Length + ").append(this.fileContent.getLength()).append("\t");
recordDesc.append("File TimeStamp + ").append(this.timeStamp).append("\t");
return recordDesc.toString();
}
@Override
public void readFields(DataInput din) throws IOException {
filePath.readFields(din);
fileName.readFields(din);
fileContent.readFields(din);
timeStamp.readFields(din);
}
@Override
public void write(DataOutput dout) throws IOException {
filePath.write(dout);
fileName.write(dout);
fileContent.write(dout);
timeStamp.write(dout);
}
}
Job Runner课程: -
package com.nayan.bigdata.hadoop;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;
/**
* @file : HadoopJobRunner.java
* @author : nayan
* @version : 1.0.0
* @date : 22-Aug-2013 12:45:15 PM
* @desc : Class to run Hadoop MR job.
*/
public class HadoopJobRunner extends Configured implements Tool {
private static Logger logger = Logger.getLogger(HadoopJobRunner.class);
/**
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new HadoopJobRunner(), args);
System.exit(res);
}
@Override
public int run(String[] arg0) throws Exception {
logger.info("Initiating Hadoop Job.");
Configuration conf = new Configuration(true);
conf.setStrings("mapred.output.dir", arg0[1]);
conf.setStrings("mapred.input.dir", arg0[0]);
Job mrJob = new Job(conf, "FileRecordsJob");
mrJob.setJarByClass(HadoopJobRunner.class);
mrJob.setMapOutputKeyClass(Text.class);
mrJob.setMapOutputValueClass(RecordWritable.class);
mrJob.setMapperClass(FileToRecordMapper.class);
mrJob.setReducerClass(FileRecordsReducer.class);
mrJob.setOutputKeyClass(Text.class);
mrJob.setOutputValueClass(RecordWritable.class);
logger.info("MapRed Job Configuration : " + mrJob.getConfiguration().toString());
logger.info("Input Path : " + mrJob.getConfiguration().get("mapred.input.dir"));
return mrJob.waitForCompletion(true) ? 0 : 1;
}
}
项目的Pom文件: -
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.nayan.bigdata</groupId>
<artifactId>BigDataOperations</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<name>BigDataOperations</name>
<properties>
<hadoop.version>0.20.2</hadoop.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-all</artifactId>
<version>1.3</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<mainClass>com.nayan.bigdata.hadoop.HadoopJobRunner</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>
当我运行jar时,我在控制台上输出: -
[root@koversevm tmp]# hadoop jar BigDataOperations-1.0-SNAPSHOT.jar /usr/hadoop/sample /usr/hadoop/jobout
13/08/28 18:33:57 INFO hadoop.HadoopJobRunner: Initiating Hadoop Job.
13/08/28 18:33:57 INFO hadoop.HadoopJobRunner: Setting the input/output path.
13/08/28 18:33:57 INFO hadoop.HadoopJobRunner: MapRed Job Configuration : Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml
13/08/28 18:33:57 INFO hadoop.HadoopJobRunner: Input Path : null
13/08/28 18:33:58 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
13/08/28 18:33:58 INFO input.FileInputFormat: Total input paths to process : 8
13/08/28 18:33:58 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
13/08/28 18:33:58 WARN snappy.LoadSnappy: Snappy native library not loaded
13/08/28 18:33:58 INFO mapred.JobClient: Running job: job_201308281800_0008
13/08/28 18:33:59 INFO mapred.JobClient: map 0% reduce 0%
13/08/28 18:34:06 INFO mapred.JobClient: map 25% reduce 0%
13/08/28 18:34:13 INFO mapred.JobClient: map 50% reduce 0%
13/08/28 18:34:17 INFO mapred.JobClient: map 75% reduce 0%
13/08/28 18:34:23 INFO mapred.JobClient: map 100% reduce 0%
13/08/28 18:34:24 INFO mapred.JobClient: map 100% reduce 33%
13/08/28 18:34:26 INFO mapred.JobClient: map 100% reduce 100%
13/08/28 18:34:27 INFO mapred.JobClient: Job complete: job_201308281800_0008
13/08/28 18:34:27 INFO mapred.JobClient: Counters: 25
13/08/28 18:34:27 INFO mapred.JobClient: Job Counters
13/08/28 18:34:27 INFO mapred.JobClient: Launched reduce tasks=1
13/08/28 18:34:27 INFO mapred.JobClient: SLOTS_MILLIS_MAPS=44066
13/08/28 18:34:27 INFO mapred.JobClient: Total time spent by all reduces waiting after reserving slots (ms)=0
13/08/28 18:34:27 INFO mapred.JobClient: Total time spent by all maps waiting after reserving slots (ms)=0
13/08/28 18:34:27 INFO mapred.JobClient: Launched map tasks=8
13/08/28 18:34:27 INFO mapred.JobClient: Data-local map tasks=8
13/08/28 18:34:27 INFO mapred.JobClient: SLOTS_MILLIS_REDUCES=19034
13/08/28 18:34:27 INFO mapred.JobClient: FileSystemCounters
13/08/28 18:34:27 INFO mapred.JobClient: FILE_BYTES_READ=6
13/08/28 18:34:27 INFO mapred.JobClient: HDFS_BYTES_READ=1011
13/08/28 18:34:27 INFO mapred.JobClient: FILE_BYTES_WRITTEN=549207
13/08/28 18:34:27 INFO mapred.JobClient: Map-Reduce Framework
13/08/28 18:34:27 INFO mapred.JobClient: Map input records=0
13/08/28 18:34:27 INFO mapred.JobClient: Reduce shuffle bytes=48
13/08/28 18:34:27 INFO mapred.JobClient: Spilled Records=0
13/08/28 18:34:27 INFO mapred.JobClient: Map output bytes=0
13/08/28 18:34:27 INFO mapred.JobClient: CPU time spent (ms)=3030
13/08/28 18:34:27 INFO mapred.JobClient: Total committed heap usage (bytes)=1473413120
13/08/28 18:34:27 INFO mapred.JobClient: Combine input records=0
13/08/28 18:34:27 INFO mapred.JobClient: SPLIT_RAW_BYTES=1011
13/08/28 18:34:27 INFO mapred.JobClient: Reduce input records=0
13/08/28 18:34:27 INFO mapred.JobClient: Reduce input groups=0
13/08/28 18:34:27 INFO mapred.JobClient: Combine output records=0
13/08/28 18:34:27 INFO mapred.JobClient: Physical memory (bytes) snapshot=1607675904
13/08/28 18:34:27 INFO mapred.JobClient: Reduce output records=0
13/08/28 18:34:27 INFO mapred.JobClient: Virtual memory (bytes) snapshot=23948111872
13/08/28 18:34:27 INFO mapred.JobClient: Map output records=0
但是当我查看日志时,我发现以下异常: -
Task Logs: 'attempt_201308281800_0008_m_000000_0'
stdout logs
2013-08-28 18:34:01 DEBUG Child:82 - Child starting
2013-08-28 18:34:02 DEBUG Groups:136 - Creating new Groups object
2013-08-28 18:34:02 DEBUG Groups:59 - Group mapping impl=org.apache.hadoop.security.ShellBasedUnixGroupsMapping; cacheTimeout=300000
2013-08-28 18:34:02 DEBUG UserGroupInformation:193 - hadoop login
2013-08-28 18:34:02 DEBUG UserGroupInformation:142 - hadoop login commit
2013-08-28 18:34:02 DEBUG UserGroupInformation:172 - using local user:UnixPrincipal: mapred
2013-08-28 18:34:02 DEBUG UserGroupInformation:664 - UGI loginUser:mapred (auth:SIMPLE)
2013-08-28 18:34:02 DEBUG FileSystem:1598 - Creating filesystem for file:///var/lib/hadoop-0.20/cache/mapred/mapred/local/taskTracker/root/jobcache/job_201308281800_0008/jobToken
2013-08-28 18:34:02 DEBUG TokenCache:182 - Task: Loaded jobTokenFile from: /var/lib/hadoop-0.20/cache/mapred/mapred/local/taskTracker/root/jobcache/job_201308281800_0008/jobToken; num of sec keys = 0 Number of tokens 1
2013-08-28 18:34:02 DEBUG Child:106 - loading token. # keys =0; from file=/var/lib/hadoop-0.20/cache/mapred/mapred/local/taskTracker/root/jobcache/job_201308281800_0008/jobToken
2013-08-28 18:34:02 DEBUG UserGroupInformation:1300 - PriviledgedAction as:job_201308281800_0008 (auth:SIMPLE) from:org.apache.hadoop.mapred.Child.main(Child.java:121)
2013-08-28 18:34:02 DEBUG Client:256 - The ping interval is60000ms.
2013-08-28 18:34:02 DEBUG Client:299 - Use SIMPLE authentication for protocol TaskUmbilicalProtocol
2013-08-28 18:34:02 DEBUG Client:569 - Connecting to /127.0.0.1:50925
2013-08-28 18:34:02 DEBUG Client:762 - IPC Client (47) connection to /127.0.0.1:50925 from job_201308281800_0008: starting, having connections 1
2013-08-28 18:34:02 DEBUG Client:808 - IPC Client (47) connection to /127.0.0.1:50925 from job_201308281800_0008 sending #0
2013-08-28 18:34:02 DEBUG Client:861 - IPC Client (47) connection to /127.0.0.1:50925 from job_201308281800_0008 got value #0
2013-08-28 18:34:02 DEBUG RPC:230 - Call: getProtocolVersion 98
2013-08-28 18:34:02 DEBUG Client:808 - IPC Client (47) connection to /127.0.0.1:50925 from job_201308281800_0008 sending #1
2013-08-28 18:34:02 DEBUG Client:861 - IPC Client (47) connection to /127.0.0.1:50925 from job_201308281800_0008 got value #1
2013-08-28 18:34:02 DEBUG SortedRanges:347 - currentIndex 0 0:0
2013-08-28 18:34:02 DEBUG Counters:177 - Creating group org.apache.hadoop.mapred.Task$Counter with bundle
2013-08-28 18:34:02 DEBUG Counters:314 - Adding SPILLED_RECORDS
2013-08-28 18:34:02 DEBUG Counters:177 - Creating group org.apache.hadoop.mapred.Task$Counter with bundle
2013-08-28 18:34:02 DEBUG SortedRanges:347 - currentIndex 0 0:0
2013-08-28 18:34:02 DEBUG SortedRanges:347 - currentIndex 1 0:0
2013-08-28 18:34:02 DEBUG RPC:230 - Call: getTask 208
2013-08-28 18:34:03 DEBUG TaskRunner:653 - mapred.local.dir for child : /var/lib/hadoop-0.20/cache/mapred/mapred/local/taskTracker/root/jobcache/job_201308281800_0008/attempt_201308281800_0008_m_000000_0
2013-08-28 18:34:03 DEBUG NativeCodeLoader:40 - Trying to load the custom-built native-hadoop library...
2013-08-28 18:34:03 DEBUG NativeCodeLoader:47 - Failed to load native-hadoop with error: java.lang.UnsatisfiedLinkError: no hadoop in java.library.path
2013-08-28 18:34:03 DEBUG NativeCodeLoader:48 - java.library.path=/usr/java/jdk1.6.0_45/jre/lib/amd64/server:/usr/java/jdk1.6.0_45/jre/lib/amd64:/usr/java/jdk1.6.0_45/jre/../lib/amd64:/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib:/var/lib/hadoop-0.20/cache/mapred/mapred/local/taskTracker/root/jobcache/job_201308281800_0008/attempt_201308281800_0008_m_000000_0/work
2013-08-28 18:34:03 WARN NativeCodeLoader:52 - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2013-08-28 18:34:03 DEBUG TaskRunner:709 - Fully deleting contents of /var/lib/hadoop-0.20/cache/mapred/mapred/local/taskTracker/root/jobcache/job_201308281800_0008/attempt_201308281800_0008_m_000000_0/work
2013-08-28 18:34:03 INFO JvmMetrics:71 - Initializing JVM Metrics with processName=MAP, sessionId=
2013-08-28 18:34:03 DEBUG Child:251 - Creating remote user to execute task: root
2013-08-28 18:34:03 DEBUG UserGroupInformation:1300 - PriviledgedAction as:root (auth:SIMPLE) from:org.apache.hadoop.mapred.Child.main(Child.java:260)
2013-08-28 18:34:03 DEBUG FileSystem:1598 - Creating filesystem for hdfs://localhost:8020
2013-08-28 18:34:04 DEBUG Client:256 - The ping interval is60000ms.
2013-08-28 18:34:04 DEBUG Client:299 - Use SIMPLE authentication for protocol ClientProtocol
2013-08-28 18:34:04 DEBUG Client:569 - Connecting to localhost/127.0.0.1:8020
2013-08-28 18:34:04 DEBUG Client:808 - IPC Client (47) connection to localhost/127.0.0.1:8020 from root sending #2
2013-08-28 18:34:04 DEBUG Client:762 - IPC Client (47) connection to localhost/127.0.0.1:8020 from root: starting, having connections 2
2013-08-28 18:34:04 DEBUG Client:861 - IPC Client (47) connection to localhost/127.0.0.1:8020 from root got value #2
2013-08-28 18:34:04 DEBUG RPC:230 - Call: getProtocolVersion 18
2013-08-28 18:34:04 DEBUG DFSClient:274 - Short circuit read is false
2013-08-28 18:34:04 DEBUG DFSClient:280 - Connect to datanode via hostname is false
2013-08-28 18:34:04 DEBUG Task:516 - using new api for output committer
2013-08-28 18:34:04 INFO ProcessTree:65 - setsid exited with exit code 0
2013-08-28 18:34:04 INFO Task:539 - Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@79ee2c2c
2013-08-28 18:34:04 DEBUG ProcfsBasedProcessTree:238 - [ 16890 ]
2013-08-28 18:34:04 DEBUG Client:808 - IPC Client (47) connection to localhost/127.0.0.1:8020 from root sending #3
2013-08-28 18:34:04 DEBUG Client:861 - IPC Client (47) connection to localhost/127.0.0.1:8020 from root got value #3
2013-08-28 18:34:04 DEBUG RPC:230 - Call: getBlockLocations 12
2013-08-28 18:34:04 DEBUG DFSClient:2595 - Connecting to /127.0.0.1:50010
2013-08-28 18:34:04 DEBUG FSInputChecker:1653 - DFSClient readChunk got seqno 0 offsetInBlock 0 lastPacketInBlock false packetLen 520
2013-08-28 18:34:04 DEBUG Counters:314 - Adding SPLIT_RAW_BYTES
2013-08-28 18:34:04 DEBUG DFSClient:2529 - Client couldn't reuse - didnt send code
2013-08-28 18:34:04 INFO MapTask:613 - Processing split: hdfs://localhost:8020/usr/hadoop/sample/2012MTCReportFINAL.pdf:0+1419623
2013-08-28 18:34:04 DEBUG Counters:314 - Adding MAP_INPUT_RECORDS
2013-08-28 18:34:04 DEBUG FileSystem:1598 - Creating filesystem for file:///
2013-08-28 18:34:04 INFO MapTask:803 - io.sort.mb = 100
2013-08-28 18:34:05 INFO MapTask:815 - data buffer = 79691776/99614720
2013-08-28 18:34:05 INFO MapTask:816 - record buffer = 262144/327680
2013-08-28 18:34:05 DEBUG Counters:314 - Adding MAP_OUTPUT_BYTES
2013-08-28 18:34:05 DEBUG Counters:314 - Adding MAP_OUTPUT_RECORDS
2013-08-28 18:34:05 DEBUG Counters:314 - Adding COMBINE_INPUT_RECORDS
2013-08-28 18:34:05 DEBUG Counters:314 - Adding COMBINE_OUTPUT_RECORDS
2013-08-28 18:34:05 WARN LoadSnappy:46 - Snappy native library not loaded
2013-08-28 18:34:05 DEBUG Client:808 - IPC Client (47) connection to localhost/127.0.0.1:8020 from root sending #4
2013-08-28 18:34:05 DEBUG Client:861 - IPC Client (47) connection to localhost/127.0.0.1:8020 from root got value #4
2013-08-28 18:34:05 DEBUG RPC:230 - Call: getBlockLocations 4
2013-08-28 18:34:05 INFO FileToRecordMapper:65 - Inside run method.
2013-08-28 18:34:05 INFO MapTask:1142 - Starting flush of map output
2013-08-28 18:34:05 INFO Task:830 - Task:attempt_201308281800_0008_m_000000_0 is done. And is in the process of commiting
2013-08-28 18:34:05 DEBUG Counters:177 - Creating group FileSystemCounters with nothing
2013-08-28 18:34:05 DEBUG Counters:314 - Adding FILE_BYTES_WRITTEN
2013-08-28 18:34:05 DEBUG Counters:314 - Adding HDFS_BYTES_READ
2013-08-28 18:34:05 DEBUG Counters:314 - Adding COMMITTED_HEAP_BYTES
2013-08-28 18:34:05 DEBUG ProcfsBasedProcessTree:238 - [ 16890 ]
2013-08-28 18:34:05 DEBUG Counters:314 - Adding CPU_MILLISECONDS
2013-08-28 18:34:05 DEBUG Counters:314 - Adding PHYSICAL_MEMORY_BYTES
2013-08-28 18:34:05 DEBUG Counters:314 - Adding VIRTUAL_MEMORY_BYTES
2013-08-28 18:34:05 DEBUG Client:808 - IPC Client (47) connection to localhost/127.0.0.1:8020 from root sending #5
2013-08-28 18:34:05 DEBUG Client:861 - IPC Client (47) connection to localhost/127.0.0.1:8020 from root got value #5
2013-08-28 18:34:05 DEBUG RPC:230 - Call: getFileInfo 2
2013-08-28 18:34:05 DEBUG Task:658 - attempt_201308281800_0008_m_000000_0 Progress/ping thread exiting since it got interrupted
2013-08-28 18:34:05 DEBUG Client:808 - IPC Client (47) connection to /127.0.0.1:50925 from job_201308281800_0008 sending #6
2013-08-28 18:34:05 DEBUG Client:861 - IPC Client (47) connection to /127.0.0.1:50925 from job_201308281800_0008 got value #6
2013-08-28 18:34:05 DEBUG RPC:230 - Call: statusUpdate 3
2013-08-28 18:34:05 DEBUG Client:808 - IPC Client (47) connection to /127.0.0.1:50925 from job_201308281800_0008 sending #7
2013-08-28 18:34:05 DEBUG Client:861 - IPC Client (47) connection to /127.0.0.1:50925 from job_201308281800_0008 got value #7
2013-08-28 18:34:05 DEBUG RPC:230 - Call: done 1
2013-08-28 18:34:05 INFO Task:942 - Task 'attempt_201308281800_0008_m_000000_0' done.
2013-08-28 18:34:05 INFO TaskLogsTruncater:69 - Initializing logs' truncater with mapRetainSize=-1 and reduceRetainSize=-1
2013-08-28 18:34:05 DEBUG TaskLogsTruncater:174 - Truncation is not needed for /usr/lib/hadoop-0.20/logs/userlogs/job_201308281800_0008/attempt_201308281800_0008_m_000000_0/stdout
2013-08-28 18:34:05 DEBUG TaskLogsTruncater:174 - Truncation is not needed for /usr/lib/hadoop-0.20/logs/userlogs/job_201308281800_0008/attempt_201308281800_0008_m_000000_0/stderr
2013-08-28 18:34:05 DEBUG TaskLogsTruncater:202 - Cannot open /usr/lib/hadoop-0.20/logs/userlogs/job_201308281800_0008/attempt_201308281800_0008_m_000000_0/syslog for reading. Continuing with other log files
java.io.FileNotFoundException: /usr/lib/hadoop-0.20/logs/userlogs/job_201308281800_0008/attempt_201308281800_0008_m_000000_0/syslog (No such file or directory)
at java.io.FileInputStream.open(Native Method)
at java.io.FileInputStream.<init>(FileInputStream.java:120)
at org.apache.hadoop.mapred.TaskLogsTruncater.truncateLogs(TaskLogsTruncater.java:199)
at org.apache.hadoop.mapred.Child$4.run(Child.java:271)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1278)
at org.apache.hadoop.mapred.Child.main(Child.java:260)
2013-08-28 18:34:05 DEBUG TaskLogsTruncater:202 - Cannot open /usr/lib/hadoop-0.20/logs/userlogs/job_201308281800_0008/attempt_201308281800_0008_m_000000_0/profile.out for reading. Continuing with other log files
java.io.FileNotFoundException: /usr/lib/hadoop-0.20/logs/userlogs/job_201308281800_0008/attempt_201308281800_0008_m_000000_0/profile.out (No such file or directory)
at java.io.FileInputStream.open(Native Method)
at java.io.FileInputStream.<init>(FileInputStream.java:120)
at org.apache.hadoop.mapred.TaskLogsTruncater.truncateLogs(TaskLogsTruncater.java:199)
at org.apache.hadoop.mapred.Child$4.run(Child.java:271)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1278)
at org.apache.hadoop.mapred.Child.main(Child.java:260)
2013-08-28 18:34:05 DEBUG TaskLogsTruncater:202 - Cannot open /usr/lib/hadoop-0.20/logs/userlogs/job_201308281800_0008/attempt_201308281800_0008_m_000000_0/debugout for reading. Continuing with other log files
java.io.FileNotFoundException: /usr/lib/hadoop-0.20/logs/userlogs/job_201308281800_0008/attempt_201308281800_0008_m_000000_0/debugout (No such file or directory)
at java.io.FileInputStream.open(Native Method)
at java.io.FileInputStream.<init>(FileInputStream.java:120)
at org.apache.hadoop.mapred.TaskLogsTruncater.truncateLogs(TaskLogsTruncater.java:199)
at org.apache.hadoop.mapred.Child$4.run(Child.java:271)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1278)
at org.apache.hadoop.mapred.Child.main(Child.java:260)
我已经检查了权限,它适用于示例WordCount程序。我是Hadoop的新手。我用谷歌搜索,但找不到任何实质性的东西。我在单节点设置上使用hadoop-0.20.2-cdh3u6。