Question

我正在编写一个mapper类，它应该从HDFS位置读取文件并为每个文件创建一个记录（使用自定义类）。 Mapper类的代码： -

package com.nayan.bigdata.hadoop;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.log4j.Logger;

/**
 * @file    : FileToRecordMapper.java
 * @author  : nayan
 * @version : 1.0.0
 * @date    : 27-Aug-2013 12:13:44 PM
 * @desc    : Mapper class to read files and convert it into records.
 */
public class FileToRecordMapper extends
        Mapper<LongWritable, Text, Text, RecordWritable> {

    private static Logger logger = Logger.getLogger(FileToRecordMapper.class);
    List<Path> allPaths;
    FileSystem fs;

    @Override
    protected void cleanup(Context context)
            throws IOException, InterruptedException {
        logger.info("Inside cleanup method.");
    }

    @Override
    protected void map(LongWritable key, Text value,
            Context context)
            throws IOException, InterruptedException {
        logger.info("Starting map method of FileToRecordMapper class.");
        for(Path path : allPaths) {
            FSDataInputStream in = this.fs.open(path);
            Text filePath = new Text(path.getName());
            Text directoryPath = new Text(path.getParent().getName());
            Text filename = new Text(path.getName().substring(path.getName().lastIndexOf('/') + 1,
            path.getName().length()));
            byte[] b = new byte[1024];
            StringBuilder contentBuilder = new StringBuilder();
            while ((in.read(b)) > 0) {
                contentBuilder.append(new String(b, "UTF-8"));
            }
            Text fileContent = new Text(contentBuilder.toString());
            in.close();
            RecordWritable record = new RecordWritable(filePath, filename, 
                    fileContent, new LongWritable(System.currentTimeMillis()));
            logger.info("Record Created : " + record);
            context.write(directoryPath, record);
            logger.info("map method of FileToRecordMapper class completed.");
        }
    }

    @Override
    public void run(Context context)
            throws IOException, InterruptedException {
        logger.info("Inside run method.");
    }

    @Override
    protected void setup(Context context)
            throws IOException, InterruptedException {
        logger.info("Inside setup method.");
        try {
            logger.info("Starting configure method of FileToRecordMapper class.");
            fs = FileSystem.get(context.getConfiguration());
            Path path = new Path(context.getConfiguration().get("mapred.input.dir"));
            allPaths = getAllPaths(path);
        } catch (IOException e) {
            logger.error("Error while fetching paths.", e);
        }
        logger.info("Paths : " + ((null != allPaths) ? allPaths : "null"));
        logger.info("configure method of FileToRecordMapper class completed.");
        super.setup(context);
    }

    private List<Path> getAllPaths(Path path) throws IOException {
        ArrayList<Path> paths = new ArrayList<Path>();
        getAllPaths(path, paths);
        return paths;
    }

    private void getAllPaths(Path path, List<Path> paths) throws IOException{       
        try {
            if (!this.fs.isFile(path)) {
                for (FileStatus s : fs.listStatus(path)) {
                    getAllPaths(s.getPath(), paths);
                }
            } else {
                paths.add(path);
            }
        } catch (IOException e) {
            logger.error("File System Exception.", e);
            throw e;
        }   
    }
}

记录类是： -

package com.nayan.bigdata.hadoop;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;

/**
 * @file    : RecordWritable.java
 * @author  : nayan
 * @version : 1.0.0
 * @date    : 21-Aug-2013 1:53:12 PM
 * @desc    : Class to create a record in Accumulo
 */
public class RecordWritable implements Writable {

    private Text filePath;
    private Text fileName;
    private Text fileContent;
    private LongWritable timeStamp;

    public RecordWritable() {
        this.filePath = new Text();
        this.fileName = new Text();
        this.fileContent = new Text();
        this.timeStamp = new LongWritable(System.currentTimeMillis());
    }

    /**
     * @param filePath
     * @param fileName
     * @param fileContent
     * @param timeStamp
     */
    public RecordWritable(Text filePath, Text fileName, Text fileContent,
            LongWritable timeStamp) {
        this.filePath = filePath;
        this.fileName = fileName;
        this.fileContent = fileContent;
        this.timeStamp = timeStamp;
    }   

    public Text getFilePath() {
        return filePath;
    }

    public void setFilePath(Text filePath) {
        this.filePath = filePath;
    }

    public Text getFileName() {
        return fileName;
    }

    public void setFileName(Text fileName) {
        this.fileName = fileName;
    }

    public Text getFileContent() {
        return fileContent;
    }

    public void setFileContent(Text fileContent) {
        this.fileContent = fileContent;
    }

    public LongWritable getTimeStamp() {
        return timeStamp;
    }

    public void setTimeStamp(LongWritable timeStamp) {
        this.timeStamp = timeStamp;
    }   

    @Override
    public int hashCode() {
        return this.filePath.getLength() + this.fileName.getLength() + this.fileContent.getLength();
    }

    @Override
    public boolean equals(Object obj) {
        if(obj instanceof RecordWritable) {
            RecordWritable otherRecord = (RecordWritable) obj;
            return this.filePath.equals(otherRecord.filePath) && this.fileName.equals(otherRecord.fileName);
        }
        return false;
    }

    @Override
    public String toString() {
        StringBuilder recordDesc = new StringBuilder("Record Details ::\t");
        recordDesc.append("File Path + ").append(this.filePath).append("\t");
        recordDesc.append("File Name + ").append(this.fileName).append("\t");
        recordDesc.append("File Content Length + ").append(this.fileContent.getLength()).append("\t");
        recordDesc.append("File TimeStamp + ").append(this.timeStamp).append("\t");
        return recordDesc.toString();
    }

    @Override
    public void readFields(DataInput din) throws IOException {
        filePath.readFields(din);
        fileName.readFields(din);
        fileContent.readFields(din);
        timeStamp.readFields(din);
    }

    @Override
    public void write(DataOutput dout) throws IOException {
        filePath.write(dout);
        fileName.write(dout);
        fileContent.write(dout);
        timeStamp.write(dout);
    }
}

Job Runner课程： -

package com.nayan.bigdata.hadoop;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;

/**
 * @file    : HadoopJobRunner.java
 * @author  : nayan
 * @version : 1.0.0
 * @date    : 22-Aug-2013 12:45:15 PM
 * @desc    : Class to run Hadoop MR job.
 */
public class HadoopJobRunner extends Configured implements Tool {

    private static Logger logger = Logger.getLogger(HadoopJobRunner.class);

    /**
     * @param args
     * @throws Exception 
     */
    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run(new Configuration(), new HadoopJobRunner(), args);
        System.exit(res);
    }

    @Override
    public int run(String[] arg0) throws Exception {
        logger.info("Initiating Hadoop Job.");
        Configuration conf = new Configuration(true);
        conf.setStrings("mapred.output.dir", arg0[1]);
        conf.setStrings("mapred.input.dir", arg0[0]);

        Job mrJob = new Job(conf, "FileRecordsJob");        
        mrJob.setJarByClass(HadoopJobRunner.class);

        mrJob.setMapOutputKeyClass(Text.class);
        mrJob.setMapOutputValueClass(RecordWritable.class);
        mrJob.setMapperClass(FileToRecordMapper.class);

        mrJob.setReducerClass(FileRecordsReducer.class);
        mrJob.setOutputKeyClass(Text.class);
        mrJob.setOutputValueClass(RecordWritable.class);

        logger.info("MapRed Job Configuration : " + mrJob.getConfiguration().toString());
        logger.info("Input Path : " + mrJob.getConfiguration().get("mapred.input.dir"));
        return mrJob.waitForCompletion(true) ? 0 : 1;
    }
}

项目的Pom文件： -

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.nayan.bigdata</groupId>
    <artifactId>BigDataOperations</artifactId>
    <version>1.0-SNAPSHOT</version>
    <packaging>jar</packaging>
    <name>BigDataOperations</name>

    <properties>
        <hadoop.version>0.20.2</hadoop.version>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
    </properties>

    <dependencies>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-core</artifactId>
            <version>${hadoop.version}</version>
        </dependency>

        <dependency>
            <groupId>log4j</groupId>
            <artifactId>log4j</artifactId>
            <version>1.2.17</version>
        </dependency>

        <dependency>
            <groupId>org.hamcrest</groupId>
            <artifactId>hamcrest-all</artifactId>
            <version>1.3</version>
            <scope>test</scope>
        </dependency>

        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.11</version>
            <scope>test</scope>
        </dependency>

    </dependencies>

    <build>
        <pluginManagement>
            <plugins>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-jar-plugin</artifactId>
                    <configuration>
                        <archive>
                            <manifest>
                                <mainClass>com.nayan.bigdata.hadoop.HadoopJobRunner</mainClass>
                            </manifest>
                        </archive>
                    </configuration>
                </plugin>
            </plugins>
        </pluginManagement>
    </build>
</project>

当我运行jar时，我在控制台上输出： -

    [root@koversevm tmp]# hadoop jar BigDataOperations-1.0-SNAPSHOT.jar /usr/hadoop/sample /usr/hadoop/jobout
13/08/28 18:33:57 INFO hadoop.HadoopJobRunner: Initiating Hadoop Job.
13/08/28 18:33:57 INFO hadoop.HadoopJobRunner: Setting the input/output path.
13/08/28 18:33:57 INFO hadoop.HadoopJobRunner: MapRed Job Configuration : Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml
13/08/28 18:33:57 INFO hadoop.HadoopJobRunner: Input Path : null
13/08/28 18:33:58 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
13/08/28 18:33:58 INFO input.FileInputFormat: Total input paths to process : 8
13/08/28 18:33:58 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
13/08/28 18:33:58 WARN snappy.LoadSnappy: Snappy native library not loaded
13/08/28 18:33:58 INFO mapred.JobClient: Running job: job_201308281800_0008
13/08/28 18:33:59 INFO mapred.JobClient:  map 0% reduce 0%
13/08/28 18:34:06 INFO mapred.JobClient:  map 25% reduce 0%
13/08/28 18:34:13 INFO mapred.JobClient:  map 50% reduce 0%
13/08/28 18:34:17 INFO mapred.JobClient:  map 75% reduce 0%
13/08/28 18:34:23 INFO mapred.JobClient:  map 100% reduce 0%
13/08/28 18:34:24 INFO mapred.JobClient:  map 100% reduce 33%
13/08/28 18:34:26 INFO mapred.JobClient:  map 100% reduce 100%
13/08/28 18:34:27 INFO mapred.JobClient: Job complete: job_201308281800_0008
13/08/28 18:34:27 INFO mapred.JobClient: Counters: 25
13/08/28 18:34:27 INFO mapred.JobClient:   Job Counters 
13/08/28 18:34:27 INFO mapred.JobClient:     Launched reduce tasks=1
13/08/28 18:34:27 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=44066
13/08/28 18:34:27 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0
13/08/28 18:34:27 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0
13/08/28 18:34:27 INFO mapred.JobClient:     Launched map tasks=8
13/08/28 18:34:27 INFO mapred.JobClient:     Data-local map tasks=8
13/08/28 18:34:27 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=19034
13/08/28 18:34:27 INFO mapred.JobClient:   FileSystemCounters
13/08/28 18:34:27 INFO mapred.JobClient:     FILE_BYTES_READ=6
13/08/28 18:34:27 INFO mapred.JobClient:     HDFS_BYTES_READ=1011
13/08/28 18:34:27 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=549207
13/08/28 18:34:27 INFO mapred.JobClient:   Map-Reduce Framework
13/08/28 18:34:27 INFO mapred.JobClient:     Map input records=0
13/08/28 18:34:27 INFO mapred.JobClient:     Reduce shuffle bytes=48
13/08/28 18:34:27 INFO mapred.JobClient:     Spilled Records=0
13/08/28 18:34:27 INFO mapred.JobClient:     Map output bytes=0
13/08/28 18:34:27 INFO mapred.JobClient:     CPU time spent (ms)=3030
13/08/28 18:34:27 INFO mapred.JobClient:     Total committed heap usage (bytes)=1473413120
13/08/28 18:34:27 INFO mapred.JobClient:     Combine input records=0
13/08/28 18:34:27 INFO mapred.JobClient:     SPLIT_RAW_BYTES=1011
13/08/28 18:34:27 INFO mapred.JobClient:     Reduce input records=0
13/08/28 18:34:27 INFO mapred.JobClient:     Reduce input groups=0
13/08/28 18:34:27 INFO mapred.JobClient:     Combine output records=0
13/08/28 18:34:27 INFO mapred.JobClient:     Physical memory (bytes) snapshot=1607675904
13/08/28 18:34:27 INFO mapred.JobClient:     Reduce output records=0
13/08/28 18:34:27 INFO mapred.JobClient:     Virtual memory (bytes) snapshot=23948111872
13/08/28 18:34:27 INFO mapred.JobClient:     Map output records=0

但是当我查看日志时，我发现以下异常： -

Task Logs: 'attempt_201308281800_0008_m_000000_0'
stdout logs
2013-08-28 18:34:01 DEBUG Child:82 - Child starting
2013-08-28 18:34:02 DEBUG Groups:136 -  Creating new Groups object
2013-08-28 18:34:02 DEBUG Groups:59 - Group mapping impl=org.apache.hadoop.security.ShellBasedUnixGroupsMapping; cacheTimeout=300000
2013-08-28 18:34:02 DEBUG UserGroupInformation:193 - hadoop login
2013-08-28 18:34:02 DEBUG UserGroupInformation:142 - hadoop login commit
2013-08-28 18:34:02 DEBUG UserGroupInformation:172 - using local user:UnixPrincipal: mapred
2013-08-28 18:34:02 DEBUG UserGroupInformation:664 - UGI loginUser:mapred (auth:SIMPLE)
2013-08-28 18:34:02 DEBUG FileSystem:1598 - Creating filesystem for file:///var/lib/hadoop-0.20/cache/mapred/mapred/local/taskTracker/root/jobcache/job_201308281800_0008/jobToken
2013-08-28 18:34:02 DEBUG TokenCache:182 - Task: Loaded jobTokenFile from: /var/lib/hadoop-0.20/cache/mapred/mapred/local/taskTracker/root/jobcache/job_201308281800_0008/jobToken; num of sec keys  = 0 Number of tokens 1
2013-08-28 18:34:02 DEBUG Child:106 - loading token. # keys =0; from file=/var/lib/hadoop-0.20/cache/mapred/mapred/local/taskTracker/root/jobcache/job_201308281800_0008/jobToken
2013-08-28 18:34:02 DEBUG UserGroupInformation:1300 - PriviledgedAction as:job_201308281800_0008 (auth:SIMPLE) from:org.apache.hadoop.mapred.Child.main(Child.java:121)
2013-08-28 18:34:02 DEBUG Client:256 - The ping interval is60000ms.
2013-08-28 18:34:02 DEBUG Client:299 - Use SIMPLE authentication for protocol TaskUmbilicalProtocol
2013-08-28 18:34:02 DEBUG Client:569 - Connecting to /127.0.0.1:50925
2013-08-28 18:34:02 DEBUG Client:762 - IPC Client (47) connection to /127.0.0.1:50925 from job_201308281800_0008: starting, having connections 1
2013-08-28 18:34:02 DEBUG Client:808 - IPC Client (47) connection to /127.0.0.1:50925 from job_201308281800_0008 sending #0
2013-08-28 18:34:02 DEBUG Client:861 - IPC Client (47) connection to /127.0.0.1:50925 from job_201308281800_0008 got value #0
2013-08-28 18:34:02 DEBUG RPC:230 - Call: getProtocolVersion 98
2013-08-28 18:34:02 DEBUG Client:808 - IPC Client (47) connection to /127.0.0.1:50925 from job_201308281800_0008 sending #1
2013-08-28 18:34:02 DEBUG Client:861 - IPC Client (47) connection to /127.0.0.1:50925 from job_201308281800_0008 got value #1
2013-08-28 18:34:02 DEBUG SortedRanges:347 - currentIndex 0   0:0
2013-08-28 18:34:02 DEBUG Counters:177 - Creating group org.apache.hadoop.mapred.Task$Counter with bundle
2013-08-28 18:34:02 DEBUG Counters:314 - Adding SPILLED_RECORDS
2013-08-28 18:34:02 DEBUG Counters:177 - Creating group org.apache.hadoop.mapred.Task$Counter with bundle
2013-08-28 18:34:02 DEBUG SortedRanges:347 - currentIndex 0   0:0
2013-08-28 18:34:02 DEBUG SortedRanges:347 - currentIndex 1   0:0
2013-08-28 18:34:02 DEBUG RPC:230 - Call: getTask 208
2013-08-28 18:34:03 DEBUG TaskRunner:653 - mapred.local.dir for child : /var/lib/hadoop-0.20/cache/mapred/mapred/local/taskTracker/root/jobcache/job_201308281800_0008/attempt_201308281800_0008_m_000000_0
2013-08-28 18:34:03 DEBUG NativeCodeLoader:40 - Trying to load the custom-built native-hadoop library...
2013-08-28 18:34:03 DEBUG NativeCodeLoader:47 - Failed to load native-hadoop with error: java.lang.UnsatisfiedLinkError: no hadoop in java.library.path
2013-08-28 18:34:03 DEBUG NativeCodeLoader:48 - java.library.path=/usr/java/jdk1.6.0_45/jre/lib/amd64/server:/usr/java/jdk1.6.0_45/jre/lib/amd64:/usr/java/jdk1.6.0_45/jre/../lib/amd64:/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib:/var/lib/hadoop-0.20/cache/mapred/mapred/local/taskTracker/root/jobcache/job_201308281800_0008/attempt_201308281800_0008_m_000000_0/work
2013-08-28 18:34:03 WARN  NativeCodeLoader:52 - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2013-08-28 18:34:03 DEBUG TaskRunner:709 - Fully deleting contents of /var/lib/hadoop-0.20/cache/mapred/mapred/local/taskTracker/root/jobcache/job_201308281800_0008/attempt_201308281800_0008_m_000000_0/work
2013-08-28 18:34:03 INFO  JvmMetrics:71 - Initializing JVM Metrics with processName=MAP, sessionId=
2013-08-28 18:34:03 DEBUG Child:251 - Creating remote user to execute task: root
2013-08-28 18:34:03 DEBUG UserGroupInformation:1300 - PriviledgedAction as:root (auth:SIMPLE) from:org.apache.hadoop.mapred.Child.main(Child.java:260)
2013-08-28 18:34:03 DEBUG FileSystem:1598 - Creating filesystem for hdfs://localhost:8020
2013-08-28 18:34:04 DEBUG Client:256 - The ping interval is60000ms.
2013-08-28 18:34:04 DEBUG Client:299 - Use SIMPLE authentication for protocol ClientProtocol
2013-08-28 18:34:04 DEBUG Client:569 - Connecting to localhost/127.0.0.1:8020
2013-08-28 18:34:04 DEBUG Client:808 - IPC Client (47) connection to localhost/127.0.0.1:8020 from root sending #2
2013-08-28 18:34:04 DEBUG Client:762 - IPC Client (47) connection to localhost/127.0.0.1:8020 from root: starting, having connections 2
2013-08-28 18:34:04 DEBUG Client:861 - IPC Client (47) connection to localhost/127.0.0.1:8020 from root got value #2
2013-08-28 18:34:04 DEBUG RPC:230 - Call: getProtocolVersion 18
2013-08-28 18:34:04 DEBUG DFSClient:274 - Short circuit read is false
2013-08-28 18:34:04 DEBUG DFSClient:280 - Connect to datanode via hostname is false
2013-08-28 18:34:04 DEBUG Task:516 - using new api for output committer
2013-08-28 18:34:04 INFO  ProcessTree:65 - setsid exited with exit code 0
2013-08-28 18:34:04 INFO  Task:539 -  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@79ee2c2c
2013-08-28 18:34:04 DEBUG ProcfsBasedProcessTree:238 - [ 16890 ]
2013-08-28 18:34:04 DEBUG Client:808 - IPC Client (47) connection to localhost/127.0.0.1:8020 from root sending #3
2013-08-28 18:34:04 DEBUG Client:861 - IPC Client (47) connection to localhost/127.0.0.1:8020 from root got value #3
2013-08-28 18:34:04 DEBUG RPC:230 - Call: getBlockLocations 12
2013-08-28 18:34:04 DEBUG DFSClient:2595 - Connecting to /127.0.0.1:50010
2013-08-28 18:34:04 DEBUG FSInputChecker:1653 - DFSClient readChunk got seqno 0 offsetInBlock 0 lastPacketInBlock false packetLen 520
2013-08-28 18:34:04 DEBUG Counters:314 - Adding SPLIT_RAW_BYTES
2013-08-28 18:34:04 DEBUG DFSClient:2529 - Client couldn't reuse - didnt send code
2013-08-28 18:34:04 INFO  MapTask:613 - Processing split: hdfs://localhost:8020/usr/hadoop/sample/2012MTCReportFINAL.pdf:0+1419623
2013-08-28 18:34:04 DEBUG Counters:314 - Adding MAP_INPUT_RECORDS
2013-08-28 18:34:04 DEBUG FileSystem:1598 - Creating filesystem for file:///
2013-08-28 18:34:04 INFO  MapTask:803 - io.sort.mb = 100
2013-08-28 18:34:05 INFO  MapTask:815 - data buffer = 79691776/99614720
2013-08-28 18:34:05 INFO  MapTask:816 - record buffer = 262144/327680
2013-08-28 18:34:05 DEBUG Counters:314 - Adding MAP_OUTPUT_BYTES
2013-08-28 18:34:05 DEBUG Counters:314 - Adding MAP_OUTPUT_RECORDS
2013-08-28 18:34:05 DEBUG Counters:314 - Adding COMBINE_INPUT_RECORDS
2013-08-28 18:34:05 DEBUG Counters:314 - Adding COMBINE_OUTPUT_RECORDS
2013-08-28 18:34:05 WARN  LoadSnappy:46 - Snappy native library not loaded
2013-08-28 18:34:05 DEBUG Client:808 - IPC Client (47) connection to localhost/127.0.0.1:8020 from root sending #4
2013-08-28 18:34:05 DEBUG Client:861 - IPC Client (47) connection to localhost/127.0.0.1:8020 from root got value #4
2013-08-28 18:34:05 DEBUG RPC:230 - Call: getBlockLocations 4
2013-08-28 18:34:05 INFO  FileToRecordMapper:65 - Inside run method.
2013-08-28 18:34:05 INFO  MapTask:1142 - Starting flush of map output
2013-08-28 18:34:05 INFO  Task:830 - Task:attempt_201308281800_0008_m_000000_0 is done. And is in the process of commiting
2013-08-28 18:34:05 DEBUG Counters:177 - Creating group FileSystemCounters with nothing
2013-08-28 18:34:05 DEBUG Counters:314 - Adding FILE_BYTES_WRITTEN
2013-08-28 18:34:05 DEBUG Counters:314 - Adding HDFS_BYTES_READ
2013-08-28 18:34:05 DEBUG Counters:314 - Adding COMMITTED_HEAP_BYTES
2013-08-28 18:34:05 DEBUG ProcfsBasedProcessTree:238 - [ 16890 ]
2013-08-28 18:34:05 DEBUG Counters:314 - Adding CPU_MILLISECONDS
2013-08-28 18:34:05 DEBUG Counters:314 - Adding PHYSICAL_MEMORY_BYTES
2013-08-28 18:34:05 DEBUG Counters:314 - Adding VIRTUAL_MEMORY_BYTES
2013-08-28 18:34:05 DEBUG Client:808 - IPC Client (47) connection to localhost/127.0.0.1:8020 from root sending #5
2013-08-28 18:34:05 DEBUG Client:861 - IPC Client (47) connection to localhost/127.0.0.1:8020 from root got value #5
2013-08-28 18:34:05 DEBUG RPC:230 - Call: getFileInfo 2
2013-08-28 18:34:05 DEBUG Task:658 - attempt_201308281800_0008_m_000000_0 Progress/ping thread exiting since it got interrupted
2013-08-28 18:34:05 DEBUG Client:808 - IPC Client (47) connection to /127.0.0.1:50925 from job_201308281800_0008 sending #6
2013-08-28 18:34:05 DEBUG Client:861 - IPC Client (47) connection to /127.0.0.1:50925 from job_201308281800_0008 got value #6
2013-08-28 18:34:05 DEBUG RPC:230 - Call: statusUpdate 3
2013-08-28 18:34:05 DEBUG Client:808 - IPC Client (47) connection to /127.0.0.1:50925 from job_201308281800_0008 sending #7
2013-08-28 18:34:05 DEBUG Client:861 - IPC Client (47) connection to /127.0.0.1:50925 from job_201308281800_0008 got value #7
2013-08-28 18:34:05 DEBUG RPC:230 - Call: done 1
2013-08-28 18:34:05 INFO  Task:942 - Task 'attempt_201308281800_0008_m_000000_0' done.
2013-08-28 18:34:05 INFO  TaskLogsTruncater:69 - Initializing logs' truncater with mapRetainSize=-1 and reduceRetainSize=-1
2013-08-28 18:34:05 DEBUG TaskLogsTruncater:174 - Truncation is not needed for /usr/lib/hadoop-0.20/logs/userlogs/job_201308281800_0008/attempt_201308281800_0008_m_000000_0/stdout
2013-08-28 18:34:05 DEBUG TaskLogsTruncater:174 - Truncation is not needed for /usr/lib/hadoop-0.20/logs/userlogs/job_201308281800_0008/attempt_201308281800_0008_m_000000_0/stderr
2013-08-28 18:34:05 DEBUG TaskLogsTruncater:202 - Cannot open /usr/lib/hadoop-0.20/logs/userlogs/job_201308281800_0008/attempt_201308281800_0008_m_000000_0/syslog for reading. Continuing with other log files
java.io.FileNotFoundException: /usr/lib/hadoop-0.20/logs/userlogs/job_201308281800_0008/attempt_201308281800_0008_m_000000_0/syslog (No such file or directory)
    at java.io.FileInputStream.open(Native Method)
    at java.io.FileInputStream.<init>(FileInputStream.java:120)
    at org.apache.hadoop.mapred.TaskLogsTruncater.truncateLogs(TaskLogsTruncater.java:199)
    at org.apache.hadoop.mapred.Child$4.run(Child.java:271)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:396)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1278)
    at org.apache.hadoop.mapred.Child.main(Child.java:260)
2013-08-28 18:34:05 DEBUG TaskLogsTruncater:202 - Cannot open /usr/lib/hadoop-0.20/logs/userlogs/job_201308281800_0008/attempt_201308281800_0008_m_000000_0/profile.out for reading. Continuing with other log files
java.io.FileNotFoundException: /usr/lib/hadoop-0.20/logs/userlogs/job_201308281800_0008/attempt_201308281800_0008_m_000000_0/profile.out (No such file or directory)
    at java.io.FileInputStream.open(Native Method)
    at java.io.FileInputStream.<init>(FileInputStream.java:120)
    at org.apache.hadoop.mapred.TaskLogsTruncater.truncateLogs(TaskLogsTruncater.java:199)
    at org.apache.hadoop.mapred.Child$4.run(Child.java:271)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:396)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1278)
    at org.apache.hadoop.mapred.Child.main(Child.java:260)
2013-08-28 18:34:05 DEBUG TaskLogsTruncater:202 - Cannot open /usr/lib/hadoop-0.20/logs/userlogs/job_201308281800_0008/attempt_201308281800_0008_m_000000_0/debugout for reading. Continuing with other log files
java.io.FileNotFoundException: /usr/lib/hadoop-0.20/logs/userlogs/job_201308281800_0008/attempt_201308281800_0008_m_000000_0/debugout (No such file or directory)
    at java.io.FileInputStream.open(Native Method)
    at java.io.FileInputStream.<init>(FileInputStream.java:120)
    at org.apache.hadoop.mapred.TaskLogsTruncater.truncateLogs(TaskLogsTruncater.java:199)
    at org.apache.hadoop.mapred.Child$4.run(Child.java:271)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:396)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1278)
    at org.apache.hadoop.mapred.Child.main(Child.java:260)

我已经检查了权限，它适用于示例WordCount程序。我是Hadoop的新手。我用谷歌搜索，但找不到任何实质性的东西。我在单节点设置上使用hadoop-0.20.2-cdh3u6。

运行Hadoop MR Job时出现FileNotFoundException

0 个答案: