hadoop_mapreduce_wordcount字符串到文本或文本到字符串

时间:2014-05-02 00:53:52

标签: hadoop

SELECT COUNT(*) 从F. A ='Lee'

我想使用wordcount示例将此查询转换为代码。

public class WordCountDriver {
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();

        Job job = new Job(conf, "word count");      
        job.setJarByClass(WordCountDriver.class);
        job.setMapperClass(WordCountMapper.class);
        job.setInputFormatClass(TextInputFormat.class);
        FileInputFormat.addInputPath(job, new Path(args[1]));

        job.setReducerClass(WordCountReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileOutputFormat.setOutputPath(job, new Path(args[2]));

        job.waitForCompletion(true);
    }
}

public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
        private final static IntWritable one = new IntWritable(1);

        public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
            StringTokenizer itr = new StringTokenizer(value.toString());
            while (itr.hasMoreTokens()) {
                context.write(new Text(itr.nextToken()), one);
            }
        }
    }

public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
    private IntWritable result = new IntWritable();

    public void reduce(Text key, Iterable<IntWritable> values, Context context) 
            throws IOException, InterruptedException {
        int sum = 0;

        Text a;
        String convertkey;
        convertkey = "Lee";
        a = new Text(convertkey);

        if( key == a){

        for (IntWritable val : values) {
            sum += val.get();
        }
        result.set(sum);
        context.write(key, result);
        }
    }
}

但是如果循环不起作用。 :( 它不能工作'if(k == a)' 如果我使用'if(k!= a)',它就可以了。 为什么它知道k不是eqaul a ??

1 个答案:

答案 0 :(得分:1)

您不想使用==。请改用equals()方法。但请注意,该键是一个Text而不是一个像&#34; Lee&#34;的字符串。您需要使用toString()方法为Text获取要比较的String。