我试图从oracle插入近3000万行到cassandra。
这是一小段代码,我很快就把它放在一起。 我得到 java.sql.SQLException:Exhausted Resultset ,以防万一我的dbread更小 (只取500000条记录)它只能插入接近3500000或更低的值,它迫使我在单线程中运行cassandra插入,接下来的时间接近2.5小时,持续3000万)
这是代码:
import java.math.BigInteger;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Timestamp;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import com.datastax.driver.core.BoundStatement;
import com.datastax.driver.core.Cluster;
import com.datastax.driver.core.Session;
import com.pega.dbutils.DataBaseUtils;
import cassandraconnect.learncassandra.ConnectionToDB;
public class DumpMultiThreadedDumpInCassandra {
Connection connection = null;
Cluster cluster = null;
Session session = null;
public static void main(String[] args) throws SQLException {
// TODO Auto-generated method stub
DumpMultiThreadedDumpInCassandra dumpInCassandra = new DumpMultiThreadedDumpInCassandra();
dumpInCassandra.openConnectionWithCassadnra();
try {
dumpInCassandra.getDataFromDbInToMemoryAndInsertInCassandra();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
finally {
System.out.println("Closing the cassandra cluster");
dumpInCassandra.session.close();
dumpInCassandra.cluster.close();
dumpInCassandra.connection.close();
}
}
private void openConnectionWithCassadnra() {
cluster = Cluster.builder().addContactPoint(ipaddress).build();
session = cluster.connect("data");
System.out.println(session.getCluster().getClusterName());
}
private void getDataFromDbInToMemoryAndInsertInCassandra() throws SQLException, InterruptedException {`enter code here`
String insertSQL = "insert into table(pysubjectid,pyissue,pygroup,pyname,pxoutcometime,price,sequence) values(?,?,?,?,?,?,?)";
final com.datastax.driver.core.PreparedStatement preparedStatement = session.prepare(insertSQL);
int threads = 3;
connection = ConnectionToDB.openConnection();
String Query = "select SUBJECTID,ISSUE,GROUPISSUE,NAMEPROP,OUTCOMETIME,PRICE,factidint from oracletable where factidint>="+startrange +" "+"and"+" "+ "factidint<="+endstartrange;
//String Query = "select SUBJECTID,ISSUE,GROUPISSUE,NAMEPROP,OUTCOMETIME,PRICE,factidint from oracletable ";
System.out.println("Query is"+">>>"+Query);
java.sql.Statement statement = connection.createStatement();
final BlockingQueue<IHFactRecords> blockingQueue = new LinkedBlockingQueue<IHFactRecords>();
final ResultSet resultSet = statement.executeQuery(Query);
long countprocessed=0;
while (resultSet.next()) { blockingQueue.offer(new
IHFactRecords(resultSet.getString(1), resultSet.getString(2),
resultSet.getString(3), resultSet.getString(4),
Timestamp.valueOf(resultSet.getString(5)), resultSet.getLong(6),
resultSet.getLong(7)));
countprocessed++;
if(countprocessed%1000000==0){
System.out.println("million put in memory");
}
}
DataBaseUtils.close(resultSet);
DataBaseUtils.close(statement);
/*ExecutorService readThreadservice = Executors.newFixedThreadPool(1);
readThreadservice.submit(new Runnable() {
public void run() {
// TODO Auto-generated method stub
try {
if (resultSet!= null){
while (resultSet.next()) {
blockingQueue.offer(new IHFactRecords(resultSet.getString(1), resultSet.getString(2),
resultSet.getString(3), resultSet.getString(4),
Timestamp.valueOf(resultSet.getString(5)), resultSet.getLong(6), resultSet.getLong(7)));
}}
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
});*/
System.out.println("Sleeping for 5 seconds");
Thread.sleep(5000);
System.out
.println("size of blocking queue populated in separated thread is now simultaneously starting ingestion"
+ ">>>" + blockingQueue.size());
long starttime = System.currentTimeMillis();
ExecutorService executorService = Executors.newFixedThreadPool(threads);
final AtomicInteger totalcountinserted = new AtomicInteger(0);
System.out.println("starting threads for parallel ingestion");
for (int i = 0; i < threads; i++) {
executorService.submit(new Runnable() {
public void run() {
// TODO Auto-generated method stub
IHFactRecords ihrecord;
while ((ihrecord = blockingQueue.poll()) != null) {
BoundStatement boundStatement = preparedStatement.bind();
boundStatement.setString(0, ihrecord.getSubjectID());
boundStatement.setString(1, ihrecord.getIssue());
boundStatement.setString(2, ihrecord.getGroup());
boundStatement.setString(3, ihrecord.getPropsition());
boundStatement.setTimestamp(4, ihrecord.getTime());
boundStatement.setVarint(5, BigInteger.valueOf(ihrecord.getPrice()));
boundStatement.setVarint(6, BigInteger.valueOf(ihrecord.getSequence()));
session.execute(boundStatement);
int createdtillnow = totalcountinserted.incrementAndGet();
if (createdtillnow % 100000 == 0)
System.out.println("Total records inserted till now are" + ">>" + createdtillnow);
}
}
});
}
executorService.shutdown();
executorService.awaitTermination(10, TimeUnit.MINUTES);
//readThreadservice.shutdown();
//readThreadservice.awaitTermination(2, TimeUnit.MINUTES);
long endTime = System.currentTimeMillis();
System.out.println("time in seconds" + ">>>" + TimeUnit.MILLISECONDS.toSeconds(endTime - starttime));
System.out.println("time in minutes" + ">>>" + TimeUnit.MILLISECONDS.toMinutes(endTime - starttime));
System.out.println("time in hrs" + ">>>" + TimeUnit.MILLISECONDS.toHours(endTime - starttime));
;
DataBaseUtils.close(resultSet);
DataBaseUtils.close(statement);
}
}