大家好,我的应用程序涉及Lucene java库有问题,我不知道错误到底是什么
这是错误控制台的示例
错误/ AndroidRuntime(25909):java.lang.OutOfMemoryError :(堆大小= 32775KB,分配= 30112KB,位图大小= 0KB) ERROR / AndroidRuntime(25909):at org.apache.lucene.index.FreqProxTermsWriterPerField $ FreqProxPostingsArray。(FreqProxTermsWriterPerField.java:193) ERROR / AndroidRuntime(25909):at org.apache.lucene.index.FreqProxTermsWriterPerField $ FreqProxPostingsArray.newInstance(FreqProxTermsWriterPerField.java:204) ERROR / AndroidRuntime(25909):at org.apache.lucene.index.ParallelPostingsArray.grow(ParallelPostingsArray.java:48) ERROR / AndroidRuntime(25909):at org.apache.lucene.index.TermsHashPerField.growParallelPostingsArray(TermsHashPerField.java:137) ERROR / AndroidRuntime(25909):at org.apache.lucene.index.TermsHashPerField.add(TermsHashPerField.java:440) ERROR / AndroidRuntime(25909):at org.apache.lucene.index.DocInverterPerField.processFields(DocInverterPerField.java:172) ERROR / AndroidRuntime(25909):at org.apache.lucene.index.DocFieldProcessorPerThread.processDocument(DocFieldProcessorPerThread.java:278) 错误/ AndroidRuntime(25909):at org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:766) ERROR / AndroidRuntime(25909):at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:2067) 错误/ AndroidRuntime(25909):at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:2041)
它说索引编写者的字段,我不知道它是什么
你们可以帮帮我吗?感谢您的回复这是我的代码
public class CalculateWeightPage {
protected static Crawlers crawlers;
protected static StopWordsAndStemmer stemmer;
protected static CountWords countWords;
protected static StringSplitter splitter;
protected static ShortingStringArray shortingStringArray;
public static String[][] calulateRelevancePage(String[][] wkt,String urlPage) {
// 1.1.Defining parameters
int p = 0;
int count = 0;
int count2 = 0;
String title = "";
String body = "";
int titleFreq = 0;
int bodyFreq = 0;
String[][] wkp = null ;
int newTf = 0;
int y = 0;
int counter = 0;
try {
// 1.2.Extracting the text body and title from webPage
Map bodyTitle = crawlers.extractBodyAndTitle(urlPage);
if(bodyTitle.containsKey("title")){
title = stemmer.removeStopWordsAndStem(((String) bodyTitle.get("title")).toLowerCase());
body = stemmer.removeStopWordsAndStem(((String) bodyTitle.get("body")).toLowerCase());
// 1.4.Making a list containing unique words from text title and body
List bodyTitleUnique = splitter.StringUnique(body);
int sizeList = bodyTitleUnique.size();
wkp = new String[sizeList][2];
// 1.5.Calculating each tf
for (int r = 0; r < sizeList; r++) {
titleFreq = 0;
bodyFreq = 0;
// 1.5.1.Calculating tf in title
titleFreq = countWords.calculate(title, bodyTitleUnique.get(r).toString());
// 1.5.2.Calculating tf in body
bodyFreq = countWords.calculate(body, bodyTitleUnique.get(r).toString());
if (!(titleFreq == 0)) {
newTf = (titleFreq * 2) + (bodyFreq - titleFreq);
} else {
newTf = titleFreq + bodyFreq;
}
// 1.6.Inserting the result into string array
if(!(newTf == 0)){
wkp[r][0] = bodyTitleUnique.get(r).toString();
wkp[r][1] = String.valueOf(newTf);
}
}
}else{
return wkp;
}
} catch (Exception e) {
// TODO: handle exception
}
return wkp;
}
}
这是第二个代码
public class CountWords {
CountWords() {
}
protected static StopWordsAndStemmer stemmer;
public static int calculate(String txt, String keyword) {
StopAnalyzer analyzer = new StopAnalyzer(Version.LUCENE_CURRENT);
RAMDirectory idx = new RAMDirectory();
int counts = 0;
int count = 0;
try {
IndexWriter writer = new IndexWriter(idx, analyzer, true,
IndexWriter.MaxFieldLength.UNLIMITED);
Document doc = new Document();
//String text1 = stemmer.removeStopWordsAndStem(txt.toLowerCase());
writer.addDocument(createDocument("", txt));
writer.optimize();
writer.close();
Searcher searcher = new IndexSearcher(idx);
IndexReader ir = IndexReader.open(idx);
TermDocs termDocs = ir.termDocs(new Term("content", keyword.toLowerCase()));
while (termDocs.next()) {
count = count + termDocs.freq();
}
//counts = count(count);
searcher.close();
} catch (IOException ioe) {
ioe.printStackTrace();
}
return count;
}
private static Document createDocument(String title, String content) {
Document doc = new Document();
doc.add(new Field("content", new StringReader(content)));
return doc;
}
private static int search(Searcher searcher, String queryString)throws ParseException, IOException {
StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "content",analyzer);
Query query = parser.parse(queryString);
TopScoreDocCollector collector = TopScoreDocCollector.create(10, true);
searcher.search(query, collector);
return collector.getTotalHits();
}
public static Integer count(int count) {
if (count == 0) {
count = 1;
} else {
count = count;
}
return count;
}
}
答案 0 :(得分:0)
您可能会收到此错误,因为您需要的所有数据都无法容纳在内存中。
但是,您的解决方案看起来有点过度设计:您不需要Lucene来计算内存中的术语频率(CountWords的方法计算),您只需要在HashMap<String, Integer>
中分析输入和存储频率地图。
此外,尽管您的代码可能有效,但有些内容在您的代码中看起来不正确: