在使用其他令牌过滤器之前,我必须分析全文。关键是令牌流结束时,无法重置该流。以下代码引发异常:“违反TokenStream合同:reset()/ close()多次调用缺少的reset(),或者子类未调用super.reset()。有关TokenStream类的更多信息,请参见Javadocs。正确的消费工作流程”。
public class LemmatizerFilter extends TokenFilter {
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
private static ArrayList<Tag> tags;
public LemmatizerFilter(TokenStream input) {
super(input);
tags = new ArrayList<Tag>();
}
@Override
public synchronized boolean incrementToken() throws IOException {
assert tags != null;
if(tags.size() == 0) {
ArrayList<String> terms = new ArrayList<String>();
while(input.incrementToken()) {
terms.add(termAtt.toString());
}
TaggerSingleton tagger = TaggerSingleton.getInstance();
tags = tagger.tag(terms);
input.end();
input.close();
input.reset();
}
if(input.incrementToken()) { //this row raises the above exception
if (!keywordAttr.isKeyword()) {
...
}
return true;
} else {
return false;
}
}
public static ArrayList<Tag> getTags() {
return tags;
}
public static synchronized void setTags(ArrayList<Tag> tags) {
LemmatizerFilter.tags = tags;
}
}