在hadoop中定义数据类型

时间:2015-03-27 16:30:07

标签: java hadoop

我用Java编写Hadoop程序,我想在Java程序中定义自己的数据类型。这是reference

这是我的代码:

import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.HashMap;

import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput; 
import java.io.DataOutput; 
import java.io.IOException; 
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class Co {

    public class Middle implements WritableComparable {

        public int number;
        public String page;

        Middle() {
            number = -1;
            page = "";
        }

        public void write(DataOutput out) throws IOException {
            out.writeInt(number);
            out.writeUTF(page);
        }

        public void readFields(DataInput in) throws IOException {
            number = in.readInt();
            page = in.readUTF();
        }

        public int compareTo(Middle o) {
            int thisValue = this.value;
            int thatValue = o.value;
            return (thisValue < thatValue ? -1 : (thisValue == thatValue ? 0 : 1));
        }

    }

    public static class TokenizerMapper extends Mapper<Object, Text, Text, Middle> {
        public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
            Middle temp = new Middle();
            temp.page = "1";
            temp.number = 1;
            context.write(new Text("A"), temp);
        }
    }

    public static class IntSumReducer extends Reducer<Text, Middle, Text, DoubleWritable> {

        public void reduce(Text key, Iterable<Middle> values, Context context) throws IOException, InterruptedException {

            context.write(new Text("A"), new DoubleWritable(0.0));
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf, "word count");
        job.setJarByClass(Co.class);
        job.setMapperClass(TokenizerMapper.class);
        job.setCombinerClass(Mycombiner.class);
        job.setReducerClass(IntSumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Middle.class);
        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

但是,由于我只是完全复制了reference中的代码,因此在编译程序时遇到了问题,这就是错误:

The information is here

那么,有人可以告诉我该怎么办?谢谢!

1 个答案:

答案 0 :(得分:1)

按顺序出现在终端

WritableComparable需要指定一个类型。在类声明中尝试以下更改,看看它是否有效。

public class Middle implements WritableComparable<Middle> {

如果这不起作用,请尝试使用compareTo方法将Object作为参数而不是中间


第一个找不到符号:这是因为对象中没有名为value的字段。我怀疑这应该是this.number而不是this.value

第二个类似的东西找不到符号错误

最后的错误是由于您的Middle类被定义为非静态嵌套类(内部类)。内部类不能实例化,除非它们已经在外部类的实例中。但是,您永远不会实例化Co类,从而无法实例化Middle类。尝试将中间移动到另一个类,或者在main方法中实例化Co,并使其余代码在非静态run(String[] args)方法中进行。有关更多信息,请参阅Oracle的explaination of nested classes

如果您想要一个更好的WritableComparable类示例,请检查here