我创建了一个自定义标记生成器,通过使用admin / analysis.jsp和system.out日志检查它似乎工作正常。但是,当我在使用此自定义标记生成器的字段中执行查询时,我看到仅为第一个查询字符串调用自定义标记化器solr(由system.out日志检查)。 你能帮我指出我错了吗? 这些是我的代码:
package com.fosp.searchengine;
import java.io.Reader;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.solr.analysis.WhitespaceTokenizerFactory;
public class JvnTextProTokenizerFactory extends WhitespaceTokenizerFactory{
@Override
public WhitespaceTokenizer create(Reader input) {
System.out.println("WhitespaceTokenizer create(Reader input)");
Reader processedStringReader = new ProcessedStringReader(input);
return new WhitespaceTokenizer(processedStringReader);
}
}
package com.fosp.searchengine;
import java.io.IOException;
import java.io.Reader;
public class ProcessedStringReader extends java.io.Reader {
private static final int BUFFER_SIZE = 1024 * 8;
private static TextProcess m_textProcess = null;
private char[] m_inputData = null;
private int m_offset = 0;
private int m_length = 0;
public ProcessedStringReader(Reader input){
char[] arr = new char[BUFFER_SIZE];
StringBuffer buf = new StringBuffer();
int numChars;
try {
while ((numChars = input.read(arr, 0, arr.length)) > 0) {
buf.append(arr, 0, numChars);
}
} catch (IOException e) {
e.printStackTrace();
}
if(m_textProcess == null){
try {
m_textProcess = new TextProcess();
} catch (IOException e) {
e.printStackTrace();
}
}
m_inputData = m_textProcess.processText(buf.toString()).toCharArray();
m_offset = 0;
m_length = m_inputData.length;
}
@Override
public int read(char[] cbuf, int off, int len) throws IOException {
int charNumber = 0;
for(int i = m_offset + off;i<m_length && charNumber< len; i++){
cbuf[charNumber] = m_inputData[i];
m_offset ++;
charNumber++;
}
if(charNumber == 0){
return -1;
}
return charNumber;
}
@Override
public void close() throws IOException {
m_inputData = null;
m_offset = 0;
m_length = 0;
}
}
Schema.xml的
<fieldType name="text_jvnTextPro" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="com.fosp.searchengine.JvnTextProTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="com.fosp.searchengine.JvnTextProTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
答案 0 :(得分:0)
这里没有错。工厂实例化的类被重用。这与分析/管理页面不同。区别在于。