Question

我在hadoop代码中使用了stanford POS标记器。在我将Pos标记器用于我的代码之后，当我在hadoop中运行它时，我反复收到ClassNotFOund错误。我已将标记器类移至hdfs。我将斯坦福的jar文件作为外部jar文件导入。我正在使用eclipse的导出选项创建jar文件。这是我的代码：

import java.io.IOException;
import java.net.URI;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
//import org.apache.hadoop.io.LongWritable;
//import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
//import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import edu.stanford.nlp.tagger.maxent.MaxentTagger;




Public class MainClass {


static MaxentTagger tagger=null;

    public static class Smap
     extends Mapper<Object, Text, Text, IntWritable>{

  private final static IntWritable one = new IntWritable(1);
  private Text word = new Text();
  private String line=" ";
  public Configuration conf;

    @Override
    protected void setup(Context context) throws IOException
    {
        conf=context.getConfiguration();
        if(context.getCacheFiles()!=null && context.getCacheFiles().length>0)
        {
            URI[] patternURIs=Job.getInstance(conf).getCacheFiles();
            Path path1=new Path(patternURIs[0].getPath());
            String filename=path1.getName().toString();
            try
            {
                 tagger=new MaxentTagger(filename);
            }
            catch(Exception e)
            {
                e.printStackTrace();
            }
        }
    }




  public void map(Object key, Text value, Context context
                  ) throws IOException, InterruptedException {

    String text = new String(value.toString());
    StringTokenizer words=new StringTokenizer(text," .,?!;:''(){}[]");
    while (words.hasMoreTokens()) {
        String abc=words.nextToken();

        line=line+" "+word;


    }
    MaxentTagger tagger =  new MaxentTagger("taggers/english-left3words-distsim.tagger");
    String s2 = tagger.tagString(line);
    word.set(s2);
    context.write(word, one);
    line=" ";
  }
}

public static class Sreduce
     extends Reducer<Text,IntWritable,Text,IntWritable> {

  private final static IntWritable one = new IntWritable(1);
  public void reduce(Text key, Iterable<Text> values,
                     Context context
                     ) throws IOException, InterruptedException {

    context.write(key, one);
  }
}
     public static void main(String[] args) throws Exception {
            Configuration conf = new Configuration();
            Job job = Job.getInstance(conf, "MainClass");

            job.setJarByClass(MainClass.class);
            job.addCacheFile(new URI("/senti/left3words-wsj-0-18.tagger"));
            job.setMapperClass(Smap.class);
            //job.setCombinerClass(Sreduce.class);
            job.setReducerClass(Sreduce.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            FileInputFormat.addInputPath(job, new Path(args[0]));
            FileOutputFormat.setOutputPath(job, new Path(args[1]));


            job.waitForCompletion(true);

            System.exit(job.waitForCompletion(true) ? 0 : 1);
          }
        }

请有人帮我解决这个问题！！！！！

如何修复“ Hadoop中的MaxentTagger类的ClassNotFoundException”

0 个答案: