Lucene的批量查询性能调优

时间:2015-12-05 04:17:59

标签: lucene

我目前针对从数据库表中读取的数据运行Lucene的查询。以下代码读取数据库表中的所有行,并在每次迭代中查询Lucene的索引。

对于400,000行数据,它运行大约18分钟。如何改进查询以加快查询效果?

import java.io.File;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class SearchLucene {
    public static void main(String[] args) {
        String indexPath="D:/indexDir";

        Analyzer analyzer = new StandardAnalyzer();
        Directory directory;

        try {
            directory = FSDirectory.open(new File(indexPath));

            //Index Reader
            IndexReader indexReader = DirectoryReader.open(directory);
            IndexSearcher indexSearcher = new
                    IndexSearcher(indexReader);
            QueryParser parser1 = new QueryParser( "FIRSTNAME",
                    analyzer);
            QueryParser parser2 = new QueryParser( "LOCATION",
                    analyzer);

            Class.forName("net.sourceforge.jtds.jdbc.Driver");
            Connection conn = DriverManager.
                    getConnection("jdbc:jtds:sqlserver://localhost:1433/alpha;instance","myuser","mypassword");
            Statement stat = conn.createStatement();
            ResultSet rs = stat.executeQuery("SELECT * FROM PRSN");


            while (rs.next()) {
                BooleanQuery boolQuery = new BooleanQuery();
                Query query1 = parser1.parse(rs.getString("FIRSTNAME"));
                Query query2 = parser2.parse(rs.getString("LOCATION"));
                query2.setBoost(3);

                if(rs.getString("LOCATION") != null)
                {                               
                    boolQuery.add(query1, Occur.MUST);
                    boolQuery.add(query2, Occur.MUST);
                }
                else
                {
                    boolQuery.add(query1, Occur.MUST);
                }

                int hitsPerPage = 10;
                TopDocs docs = indexSearcher.search(boolQuery, hitsPerPage);
                ScoreDoc[] hits = docs.scoreDocs;
                int end = Math.min(docs.totalHits, hitsPerPage);

                for (int i = 0; i < end; i++) {
                    Document d = indexSearcher.doc(hits[i].doc);                    
                    if(hits[i].score>5)
                    {
                        System.out.print(d.get("FIRSTNAME"));
                        System.out.print(d.get("LOCATION"));
                        System.out.println(hits[i].score);
                    }
                }
            }
            conn.close();
            indexReader.close();

        } catch (IOException | ParseException | ClassNotFoundException | SQLException e) {
            e.printStackTrace();
        }
    }
}

0 个答案:

没有答案