如何使用SQL获得更快的更新Solr索引

时间:2019-03-26 10:59:21

标签: java sql solrj

我有一个Access数据库,其中有173000+行。我想使用ContentStreamupdateRequest在我的Solr核心中为它们建立索引,但是建立索引需要很长时间,而我的项目的重点是速度。我怎样才能更快地完成?有什么建议吗?我的密码

package org.solr;

import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;

import static org.junit.Assert.assertNotNull;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.time.format.DateTimeFormatter;
import java.util.Iterator;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.UUID;

import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrRequest.METHOD;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.impl.XMLResponseParser;
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
import org.apache.solr.client.solrj.request.AbstractUpdateRequest.ACTION;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.CollectionAdminParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.ContentStreamBase.ByteArrayStream;
import org.apache.solr.handler.dataimport.DateFormatTransformer;
import org.apache.solr.schema.DateValueFieldType;


public class dbJava {

    public static void main(String Args[]) throws SolrServerException, IOException {
        Statement state;
        Connection connect;
        int count=0;
        String allResults = null;
        try {
            connect = DriverManager.getConnection("jdbc:ucanaccess://C:\\Users\\FTK1187\\Desktop/eArchiveMaster.mdb","FTEC198","");
            state = connect.createStatement();  
            ResultSet rs = state.executeQuery("SELECT * FROM TableArchive ORDER BY ID" );

            //System.out.println(count);
            String urlString="http://localhost:8983/solr/archiveCore";
            HttpSolrClient solr=new HttpSolrClient.Builder(urlString).build();
            ((HttpSolrClient) solr).setParser(new XMLResponseParser());
            //SolrInputDocument document=new SolrInputDocument();
            final String contentStream = "extract this text ...";
            final String contentType = "text/csv;charset=UTF-8";
            final ByteArrayStream byteArrayStream = new ByteArrayStream(contentStream.getBytes(StandardCharsets.UTF_8), null);
            byteArrayStream.setContentType(contentType);
            final ModifiableSolrParams modifiableSolrParams = new ModifiableSolrParams();
            modifiableSolrParams.add(CollectionAdminParams.COLLECTION, "archiveCore");
            ContentStreamUpdateRequest req = new ContentStreamUpdateRequest("/update/extract"); 
            req.addContentStream(byteArrayStream);
            req.setParams(modifiableSolrParams);
            req.setMethod(METHOD.POST);
            solr.deleteByQuery("*");
            solr.commit();
            //req.addFile(new File("C:\\Users\\FTK1187\\Desktop\\E-Archive - Copy\\TableArchive.csv"),"text/csv");
            while(rs.next()){
                count++;
                req.setParam("literal.id", String.valueOf(rs.getInt("ID")));
                req.setParam("literal.NameAdded", String.valueOf(rs.getString("NameAdded")));
                req.setParam("literal.DateAdded", String.valueOf(rs.getString("DateAdded")));
                req.setParam("literal.NameModified", String.valueOf(rs.getString("NameModified")));
                req.setParam("literal.DateModified", String.valueOf(rs.getString("DateModified")));
                req.setParam("literal.strSO", String.valueOf(rs.getString("strSO")));
                req.setParam("literal.strCust", String.valueOf(rs.getString("strCust")));
                req.setParam("literal.strOperator", String.valueOf(rs.getString("strOperator")));
                req.setParam("literal.PackName", String.valueOf(rs.getString("PackName")));
                req.setParam("literal.DocName", String.valueOf(rs.getString("DocName")));
                req.setParam("literal.DocType", String.valueOf(rs.getString("DocType")));
                req.setParam("literal.extType", String.valueOf(rs.getString("extType")));
                req.setParam("literal.FileName", String.valueOf(rs.getString("FileName")));
                req.setParam("literal.FilePath", String.valueOf(rs.getString("FilePath")));
                req.setParam("literal.NameDeleted", String.valueOf(rs.getString("NameDeleted")));
                req.setParam("literal.DateDeleted", String.valueOf(rs.getString("DateDeleted")));
                req.setParam("literal.intRev", String.valueOf(rs.getString("intRev")));
                req.setAction(ACTION.COMMIT, true, true);
                solr.request(req);
            }


            System.out.println("Done...");
        } 
        catch(IOException e) {
            e.printStackTrace();
        } catch (SQLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

    }

}

我应该使用什么;是ContentStreamUpdateRequest还是SolrInputDocument?

0 个答案:

没有答案