如何从地图访问分布式缓存文件并获取文件的数据?

时间:2013-05-28 18:59:10

标签: hadoop

我只是试图从映射器访问分布式缓存文件并尝试将缓存文件中的记录(字符串)设置为关键,以检查我是否从缓存文件中获取内容(stop.txt) ,但我得到的是实际文件的文件内容,它是输入文件(input.txt)内容的关键。请指导 缓存文件和输入文件都在hdfs

以下是我的实际代码

package com.cache;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Map;

import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.lib.IdentityReducer;

public class DistributedCacheTest  {

    public static class MyMap extends MapReduceBase implements Mapper<LongWritable,Text , Text, IntWritable>{

        public Path[] localArchives;
        public Path[] localFiles;
        BufferedReader cacheReader;

        public void configure(JobConf job){
             // Get the cached archives/files
             try {
                localArchives = DistributedCache.getLocalCacheArchives(job);
                 localFiles = DistributedCache.getLocalCacheFiles(job);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
              }


    public void map(LongWritable  key,Text value ,
            OutputCollector<Text, IntWritable> output, Reporter report)
            throws IOException {

        if (localFiles != null && localFiles.length > 0) {
             System.out.println("Inside setup(): "
             + localFiles[0].toString());

             String line;
             try{
             cacheReader = new BufferedReader(new FileReader(localFiles[0].toString()));
             while((line=cacheReader.readLine())!=null)
             {
                 System.out.println("**********" + line);
                 output.collect(new Text(line), new IntWritable(1));
             }
             }
             catch(Exception ex)
             {
                 ex.printStackTrace();
             }
             finally
             {
                 cacheReader.close();
             }



    }
        }

    }
    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException {
         if (args.length != 2) {
            System.err.println("Usage: MaxTemperature <input path> <output path>");
            System.exit(-1);
            }
         JobConf job =new JobConf(DistributedCacheTest.class);
         job.setJobName("DistriTestjob");
         DistributedCache.addCacheFile(new URI("/user/hadoop/stop.txt"),job);

         job.setMapperClass(MyMap.class);
         job.setReducerClass(IdentityReducer.class);

         job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            FileInputFormat.addInputPath(job, new Path(args[0]));
            FileOutputFormat.setOutputPath(job, new Path(args[1]));

            JobClient.runJob(job);


    }

}

0 个答案:

没有答案