在从Oracle DB读取paralley时,在To Cassandra中并行插入(使用多个线程)

时间:2016-08-22 13:50:25

标签: java multithreading oracle cassandra

我试图从oracle插入近3000万行到cassandra。

这是一小段代码,我很快就把它放在一起。 我得到 java.sql.SQLException:Exhausted Resultset ,以防万一我的dbread更小 (只取500000条记录)它只能插入接近3500000或更低的值,它迫使我在单线程中运行cassandra插入,接下来的时间接近2.5小时,持续3000万)

这是代码:

import java.math.BigInteger;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Timestamp;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;

import com.datastax.driver.core.BoundStatement;
import com.datastax.driver.core.Cluster;
import com.datastax.driver.core.Session;
import com.pega.dbutils.DataBaseUtils;

import cassandraconnect.learncassandra.ConnectionToDB;

public class DumpMultiThreadedDumpInCassandra {

    Connection connection = null;
    Cluster cluster = null;
    Session session = null;


    public static void main(String[] args) throws SQLException {
        // TODO Auto-generated method stub

        DumpMultiThreadedDumpInCassandra dumpInCassandra = new DumpMultiThreadedDumpInCassandra();
        dumpInCassandra.openConnectionWithCassadnra();
        try {
            dumpInCassandra.getDataFromDbInToMemoryAndInsertInCassandra();
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        finally {
            System.out.println("Closing the cassandra cluster");
            dumpInCassandra.session.close();
            dumpInCassandra.cluster.close();
            dumpInCassandra.connection.close();
        }

    }

    private void openConnectionWithCassadnra() {

        cluster = Cluster.builder().addContactPoint(ipaddress).build();
        session = cluster.connect("data");
        System.out.println(session.getCluster().getClusterName());

    }

    private void getDataFromDbInToMemoryAndInsertInCassandra() throws SQLException, InterruptedException {`enter code here`
        String insertSQL = "insert into table(pysubjectid,pyissue,pygroup,pyname,pxoutcometime,price,sequence) values(?,?,?,?,?,?,?)";
        final com.datastax.driver.core.PreparedStatement preparedStatement = session.prepare(insertSQL);

        int threads = 3;

        connection = ConnectionToDB.openConnection();

        String Query = "select SUBJECTID,ISSUE,GROUPISSUE,NAMEPROP,OUTCOMETIME,PRICE,factidint from oracletable where factidint>="+startrange +" "+"and"+" "+ "factidint<="+endstartrange;
        //String Query = "select SUBJECTID,ISSUE,GROUPISSUE,NAMEPROP,OUTCOMETIME,PRICE,factidint from oracletable ";
        System.out.println("Query is"+">>>"+Query);

        java.sql.Statement statement = connection.createStatement();

        final BlockingQueue<IHFactRecords> blockingQueue = new LinkedBlockingQueue<IHFactRecords>();

        final ResultSet resultSet = statement.executeQuery(Query);

            long countprocessed=0;
          while (resultSet.next()) { blockingQueue.offer(new
         IHFactRecords(resultSet.getString(1), resultSet.getString(2),
         resultSet.getString(3), resultSet.getString(4),
         Timestamp.valueOf(resultSet.getString(5)), resultSet.getLong(6),
          resultSet.getLong(7)));
          countprocessed++;
          if(countprocessed%1000000==0){
              System.out.println("million put in memory");


          }


          }
        DataBaseUtils.close(resultSet);
        DataBaseUtils.close(statement);

        /*ExecutorService readThreadservice = Executors.newFixedThreadPool(1);
        readThreadservice.submit(new Runnable() {

            public void run() {
                // TODO Auto-generated method stub
                try {
                    if (resultSet!= null){
                    while (resultSet.next()) {
                        blockingQueue.offer(new IHFactRecords(resultSet.getString(1), resultSet.getString(2),
                                resultSet.getString(3), resultSet.getString(4),
                                Timestamp.valueOf(resultSet.getString(5)), resultSet.getLong(6), resultSet.getLong(7)));

                    }}
                } catch (SQLException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
        });*/

        System.out.println("Sleeping for 5 seconds");
        Thread.sleep(5000);
        System.out
                .println("size of blocking queue populated in separated thread is now simultaneously starting ingestion"
                        + ">>>" + blockingQueue.size());
        long starttime = System.currentTimeMillis();
        ExecutorService executorService = Executors.newFixedThreadPool(threads);
        final AtomicInteger totalcountinserted = new AtomicInteger(0);
        System.out.println("starting threads for parallel ingestion");
        for (int i = 0; i < threads; i++) {
            executorService.submit(new Runnable() {

                public void run() {
                    // TODO Auto-generated method stub
                    IHFactRecords ihrecord;
                    while ((ihrecord = blockingQueue.poll()) != null) {
                        BoundStatement boundStatement = preparedStatement.bind();

                        boundStatement.setString(0, ihrecord.getSubjectID());
                        boundStatement.setString(1, ihrecord.getIssue());
                        boundStatement.setString(2, ihrecord.getGroup());

                        boundStatement.setString(3, ihrecord.getPropsition());

                        boundStatement.setTimestamp(4, ihrecord.getTime());
                        boundStatement.setVarint(5, BigInteger.valueOf(ihrecord.getPrice()));
                        boundStatement.setVarint(6, BigInteger.valueOf(ihrecord.getSequence()));

                        session.execute(boundStatement);

                        int createdtillnow = totalcountinserted.incrementAndGet();
                        if (createdtillnow % 100000 == 0)
                            System.out.println("Total records inserted till now are" + ">>" + createdtillnow);
                    }
                }
            });
        }

        executorService.shutdown();

        executorService.awaitTermination(10, TimeUnit.MINUTES);

        //readThreadservice.shutdown();
        //readThreadservice.awaitTermination(2, TimeUnit.MINUTES);
        long endTime = System.currentTimeMillis();

        System.out.println("time in seconds" + ">>>" + TimeUnit.MILLISECONDS.toSeconds(endTime - starttime));
        System.out.println("time in minutes" + ">>>" + TimeUnit.MILLISECONDS.toMinutes(endTime - starttime));
        System.out.println("time in hrs" + ">>>" + TimeUnit.MILLISECONDS.toHours(endTime - starttime));
        ;
        DataBaseUtils.close(resultSet);
        DataBaseUtils.close(statement);

    }

}

0 个答案:

没有答案