在另一个Java程序中运行mapreduce类

时间:2013-12-06 07:17:26

标签: java hadoop mapreduce

我编写了一个mapreduce类,并从该类创建一个jar文件。现在我想在另一个java程序中使用这个jar。 谁能帮帮我,我怎么能这样做? 感谢

这是我的MapReduce计划:

package org.apache.cassandra.com;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.cassandra.hadoop.ConfigHelper;
import org.apache.cassandra.hadoop.cql3.CqlConfigHelper;
import org.apache.cassandra.hadoop.cql3.CqlPagingInputFormat;
import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


public class CassandraSumLib extends Configured implements Tool
{
    public CassandraSumLib(){

    }

    static final String KEYSPACE = "weather";
    static final String COLUMN_FAMILY = "momentinfo1";
    static final String OUTPUT_PATH = "/tmp/OutPut";

    private static final Logger logger = LoggerFactory.getLogger(CassandraSum.class);

    public int CassandraSum(String[] args) throws Exception
    {

        return ToolRunner.run(new Configuration(), new CassandraSumLib(), args);

       }

    ///////////////////////////////////////////////////////////

    public static class Summap extends Mapper<Map<String, ByteBuffer>, Map<FloatWritable, ByteBuffer>, Text, DoubleWritable>
      {
        Text word = new Text("SUM");
        float temp;
        public void map(Map<String, ByteBuffer> keys, Map<FloatWritable, ByteBuffer> columns, Context context) throws IOException, InterruptedException
        {
            for (Entry<FloatWritable, ByteBuffer> column : columns.entrySet())
            {
                if (!"column".equals(column.getKey()))
                    continue;
                temp = ByteBufferUtil.toFloat(column.getValue());
                //System.out.println(temp);
                context.write(word, new DoubleWritable(temp));
                        //System.out.println(word + "    " + temp);
            }
        }
    }

///////////////////////////////////////////////////////////

    public static class Sumred extends Reducer<Text, DoubleWritable, Text, DoubleWritable>
    {
        public void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException
        {

             Double sum = 0.0;
             for (DoubleWritable val : values){
            //  System.out.println(val.get());
               sum += val.get();}

             context.write(key, new DoubleWritable(sum));
         }
    }

///////////////////////////////////////////////////////////

    public int run(String[] args) throws Exception
    {

         Job job = new Job(getConf(), "SUM");
         job.setJarByClass(CassandraSum.class);
         job.setMapperClass(Summap.class);


         JobConf conf = new JobConf( getConf(), CassandraSum.class);
     //    conf.setNumMapTasks(1000);
       //  conf.setNumReduceTasks(900);

         job.setOutputFormatClass(TextOutputFormat.class);
         job.setCombinerClass(Sumred.class);
         job.setReducerClass(Sumred.class);
         job.setOutputKeyClass(Text.class);
         job.setNumReduceTasks(900);
         job.setOutputValueClass(DoubleWritable.class);
         FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));


         job.setInputFormatClass(CqlPagingInputFormat.class);

         ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160");
         ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost");
         ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY);
         ConfigHelper.setInputPartitioner(job.getConfiguration(), "Murmur3Partitioner");

         CqlConfigHelper.setInputCQLPageRowSize(job.getConfiguration(), "3");
         job.waitForCompletion(true);

         return 0;
    }
  }

我想从另一个程序中调用此类。这是我的第二个程序,它叫我的第一个程序:

package org.apache.cassandra.com;

import java.util;
import org.apache.hadoop.util.RunJar;
import org.apache.cassandra.com.CassandraSumLib;


public class CassandraSum {

    public static void main(String[] args) throws Exception{
        CassandraSumLib CSL = new CassandraSumLib();
        CSL.??? (which method should I write here?)
    }
}

感谢

2 个答案:

答案 0 :(得分:1)

在eclipse中添加jar文件的步骤

1. right click on project 
2. click on Bulid Path->configure path 
3. click on java Build path 
4. Click on libraries tab 
5. click on add external jar tab 
6. choose jar file 
7. click on ok

答案 1 :(得分:0)

将jar添加到第二个程序的类路径中。如果要从命令行编译/运行,请使用-cp选项。