在Java中使用ClearNLP库作为语义角色贴标机

时间:2015-03-19 08:49:16

标签: java nlp

我想使用clearNLP库进行语义角色标记...我已经将modelType的输入作为“general-en”输入并输入为文件而输出正被写入文件我猜..请帮助我和我得到NullPointerException。 这是代码:

public class DemoNLPDecode{
  final String language = AbstractReader.LANG_EN;

  public DemoNLPDecode(String modelType, String inputFile, String outputFile) throws Exception{
    AbstractTokenizer tokenizer = NLPGetter.getTokenizer(language);
    AbstractComponent tagger = NLPGetter.getComponent(modelType, language, NLPMode.MODE_POS);
    AbstractComponent parser = NLPGetter.getComponent(modelType, language, NLPMode.MODE_DEP);
    AbstractComponent identifier = NLPGetter.getComponent(modelType, language, NLPMode.MODE_PRED);
    AbstractComponent classifier = NLPGetter.getComponent(modelType, language, NLPMode.MODE_ROLE);
    AbstractComponent labeler = NLPGetter.getComponent(modelType, language, NLPMode.MODE_SRL);

    AbstractComponent [] components = {tagger, parser, identifier, classifier, labeler};

    String sentence = "I'd like to meet Dr. Choi.";
    process(tokenizer, components, sentence);
    process(tokenizer, components, UTInput.createBufferedFileReader(inputFile), UTOutput.createPrintBufferedFileStream(outputFile));
  }

  public void process(AbstractTokenizer tokenizer, AbstractComponent[] components, String sentence){
    DEPTree tree = NLPGetter.toDEPTree(tokenizer.getTokens(sentence));
    for (AbstractComponent component : components)
        component.process(tree);
    System.out.println(tree.toStringSRL()+"\n");
  }

  public void process(AbstractTokenizer tokenizer, AbstractComponent[] components, BufferedReader reader, PrintStream fout){
    AbstractSegmenter segmenter = NLPGetter.getSegmenter(language, tokenizer);
    DEPTree tree;

    for (List<String> tokens : segmenter.getSentences(reader)){
        tree = NLPGetter.toDEPTree(tokens);
        for (AbstractComponent component : components)
            component.process(tree);
        fout.println(tree.toStringSRL()+"\n");
    }

    fout.close();
  }

  public static void main(String[] args){
    String modelType  = "general-en";   // "general-en" or "medical-en"
    String inputFile  = "E:/References/Test Files/46.txt";
    String outputFile = "E:/References/Test Files/46_1.txt";

    try{
      new DemoNLPDecode(modelType, inputFile, outputFile);
    }
    catch (Exception e) {e.printStackTrace();}
  }
}

我得到的错误是

java.lang.NullPointerException
at java.io.Reader.<init>(Reader.java:78)
at java.io.InputStreamReader.<init>(InputStreamReader.java:72)
at com.clearnlp.util.UTInput.getStringSet(UTInput.java:101)
at com.clearnlp.tokenization.EnglishTokenizer.initDictionaries(EnglishTokenizer.java:305)
at com.clearnlp.tokenization.EnglishTokenizer.<init>(EnglishTokenizer.java:130)
at com.clearnlp.nlp.NLPGetter.getTokenizer(NLPGetter.java:106)
at DemoNLPDecode.<init>(DemoNLPDecode.java:25)
at DemoNLPDecode.main(DemoNLPDecode.java:75)

java.lang.NullPointerException
at java.util.zip.InflaterInputStream.<init>(InflaterInputStream.java:83)
at java.util.zip.GZIPInputStream.<init>(GZIPInputStream.java:77)
at java.util.zip.GZIPInputStream.<init>(GZIPInputStream.java:91)
at com.clearnlp.nlp.NLPGetter.getObjectInputStream(NLPGetter.java:176)
at com.clearnlp.nlp.NLPGetter.getComponent(NLPGetter.java:147)
at DemoNLPDecode.<init>(DemoNLPDecode.java:26)
at DemoNLPDecode.main(DemoNLPDecode.java:75)

0 个答案:

没有答案