在Solr

时间:2018-04-11 06:02:26

标签: solr opennlp query-parser

我尝试创建一个CustomQueryParser,我也在使用OpenNLP库。

我的目标是,如果我有一个问题“有多少有缺陷的轮辋导致中国ABC轮胎失效”

我希望最终的查询类似于“有缺陷的轮辋失效轮胎中国” 然后将进入分析仪进行进一步处理。

这是我的QueryParserPlugin代码 -

package com.mycompany.lucene.search;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QParserPlugin;
import com.mycompany.lucene.search.QueryParser;

public class QueryParserPlugin extends QParserPlugin {
@Override
  public QParser createParser(String qstr, SolrParams localParams, 
SolrParams params, SolrQueryRequest req) {
    return new QueryParser(qstr, localParams, params, req, "body_txt_str");
  }
}

我的QueryParser的代码 -

package com.mycompany.lucene.search;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.QParser;
import org.apache.solr.search.SyntaxError;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;

public class QueryParser extends QParser {

  private String fieldName;

public QueryParser(String qstr, SolrParams localParams, SolrParams params, 
SolrQueryRequest req,
      String defaultFieldName) {

    super(qstr, localParams, params, req);

    fieldName = localParams.get("field");
    if (fieldName == null) {
      fieldName = params.get("df");
    }
  }
@Override
  public Query parse() throws SyntaxError {
    Analyzer analyzer = req.getSchema().getQueryAnalyzer(); 
    InputStream tokenModelIn = null;
    InputStream posModelIn = null;
    try {
        tokenModelIn = new FileInputStream("/Files/en-token.bin");
     } catch (FileNotFoundException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    TokenizerModel tokenModel = null;
    try {
        tokenModel = new TokenizerModel(tokenModelIn);
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    Tokenizer tokenizer = new TokenizerME(tokenModel);
    String tokens[] = tokenizer.tokenize(qstr);

    try {
        posModelIn = new FileInputStream("/Files/en-pos-maxent.bin");
    } catch (FileNotFoundException e) {
        // TODO Auto-generated catch block
         e.printStackTrace();
    }
      // loading the parts-of-speech model from stream
    POSModel posModel = null;
    try {
         posModel = new POSModel(posModelIn);
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    // initializing the parts-of-speech tagger with model 
    POSTaggerME posTagger = new POSTaggerME(posModel);
    // Tagger tagging the tokens
    String tags[] = posTagger.tag(tokens);
    String final_query = "";
    for(int i=0;i<tokens.length;i++){
        if (tags[i]=="JJ" || tags[i]=="NNS" || tags[i]=="NN") {
            final_query = final_query + " " +tokens[i];
        }
    }
    TermQuery tq= new TermQuery(new Term(fieldName,final_query));  
    return tq; 
    }
}

然后我将其导出为jar并将这些jar添加到我的solrconfig.xml -

<lib dir="${solr.install.dir:../../../..}/contrib/customparser/lib" 
 regex=".*\.JAR" />
<lib dir="${solr.install.dir:../../../..}/contrib/analysis-extras/lib" 
 regex="opennlp-.*\.jar" />

但是得到以下错误:

引起:

java.lang.NoClassDefFoundError: opennlp/tools/tokenize/Tokenizer
    at java.lang.Class.forName0(Native Method)
    at java.lang.Class.forName(Class.java:348)
    at org.apache.solr.core.SolrResourceLoader.findClass(SolrResourceLoader.java:541)
    at org.apache.solr.core.SolrResourceLoader.findClass(SolrResourceLoader.java:488)
    at org.apache.solr.core.SolrCore.createInstance(SolrCore.java:786)
    at org.apache.solr.core.PluginBag.createPlugin(PluginBag.java:135)
    at org.apache.solr.core.PluginBag.init(PluginBag.java:271)
    at org.apache.solr.core.PluginBag.init(PluginBag.java:260)
    at org.apache.solr.core.SolrCore.<init>(SolrCore.java:957)
    ... 9 more

这是我第一次创建CustomQueryParser,你能不能帮帮我。

由于

1 个答案:

答案 0 :(得分:1)

最有可能是你的路径

  

$ {solr.install.dir:../../../ ..} /了contrib /分析-额外/ lib中

不包含相关的opennlp jar或正则表达式不合适。 这是第一件要检查的事情。

你必须要么&#34;捆绑&#34;您自定义查询解析器jar中的opennlp依赖项(例如,如果您使用maven构建项目,使用maven-assembly-plugin,maven-shade-plugin等)或确保您的相关指令中的opennlp特定jar solrconfig.xml匹配。