我正在编写一个Java应用程序,它从MySQL读取数据并将其存储在Cassandra中,因为Sqoop不支持直接导入Cassandra。由于MySQL中的大量记录(以百万计),我使用Producer-Consumer框架来实现相同的目标。但是我得到了ReadTimeOut异常(com.datastax.driver.core.exceptions.DriverException:读取时超时)。我有一个Producer类,它从MySQL读取数据并将其放入一个队列。有一个消费者类从该队列中读取数据并将其推送到Cassndra。有一个管理器类充当这两个类之间的协调桥梁。 制片人类: -
public class MySQLPrintJobProducer implements Runnable {
private BlockingQueue<PrintJobDAO> printerJobQueue = null;
private Connection conn = null;
public MySQLPrintJobProducer(BlockingQueue<PrintJobDAO> printerJobQueue) throws MySQLClientException {
this.printerJobQueue = printerJobQueue;
connect();
}
private void connect() throws MySQLClientException {
try {
Class.forName(MySQLClientConstants.MYSQL_JDBC_DRIVER);
conn = DriverManager.getConnection("jdbc:mysql://mysqlserverhose/mysqldb?user=mysqluser&password=mysqlpasswd");
} catch (ClassNotFoundException e) {
throw new MySQLClientException(ExceptionUtils.getStackTrace(e));
} catch (SQLException e) {
throw new MySQLClientException(ExceptionUtils.getStackTrace(e));
}
}
public void run() {
ResultSet rs = null;
Statement stmt = null;
PreparedStatement pStmt = null;
try {
stmt = conn.createStatement();
// Get total number of print jobs stored.
rs = stmt.executeQuery(MySQLClientConstants.PRINT_JOB_COUNT_QUERY);
int totalPrintJobs = 0;
if(rs != null) {
while(rs.next()) {
totalPrintJobs = rs.getInt(1);
}
}
// Determine the number of iterations.
int rowOffset = 1;
int totalIteration = ((totalPrintJobs / ExportManagerConstants.DATA_TRANSFER_BATCH_SIZE) + 1);
pStmt = conn.prepareStatement(MySQLClientConstants.PRINT_JOB_FETCH_QUERY);
int totalRecordsFetched = 0;
// Iterate over to fetch Print Job Records in bathces and put it into the queue.
for(int i = 1; i <= totalIteration; i++) {
pStmt.setInt(1, rowOffset);
pStmt.setInt(2, ExportManagerConstants.DATA_TRANSFER_BATCH_SIZE);
System.out.println("In iteration : " + i + ", Row Offset : " + rowOffset);
rs = pStmt.executeQuery();
synchronized (this.printerJobQueue) {
if(this.printerJobQueue.remainingCapacity() > 0) {
while(rs.next()) {
totalRecordsFetched = rs.getRow();
printerJobQueue.offer(new PrintJobDAO(rs.getInt(1), rs.getInt(2), rs.getString(3), rs.getDate(4),
rs.getTimestamp(5), rs.getInt(6), rs.getInt(7), rs.getInt(8), rs.getInt(9),
rs.getInt(10), rs.getFloat(11), rs.getFloat(12), rs.getInt(13), rs.getFloat(14), rs.getInt(15),
rs.getDouble(16), rs.getDouble(17), rs.getDouble(18), rs.getDouble(19), rs.getDouble(20),
rs.getFloat(21)));
this.printerJobQueue.notifyAll();
}
System.out.println("In iteration : " + i + ", Records Fetched : " + totalRecordsFetched +
", Queue Size : " + printerJobQueue.size());
rowOffset += ExportManagerConstants.DATA_TRANSFER_BATCH_SIZE;
} else {
System.out.println("Print Job Queue is full, waiting for Consumer thread to clear.");
this.printerJobQueue.wait();
}
}
}
} catch (SQLException e) {
System.err.println(ExceptionUtils.getStackTrace(e));
} catch (InterruptedException e) {
System.err.println(ExceptionUtils.getStackTrace(e));
} finally {
try {
if(null != rs) {
rs.close();
}
if(null != stmt) {
stmt.close();
}
if(null != pStmt) {
pStmt.close();
}
} catch (SQLException e) {
System.err.println(ExceptionUtils.getStackTrace(e));
}
}
ExportManager.setProducerCompleted(true);
}
}
消费者类: -
public class CassandraPrintJobConsumer implements Runnable {
private Cluster cluster = null;
private Session session = null;
private BlockingQueue<PrintJobDAO> printerJobQueue = null;
public CassandraPrintJobConsumer(BlockingQueue<PrintJobDAO> printerJobQueue) throws CassandraClientException {
this.printerJobQueue = printerJobQueue;
cluster = Cluster.builder().withPort(9042).addContactPoint("http://cassandrahost").build();
}
public void run() {
int printJobConsumed = 0;
int batchInsertCount = 1;
if(cluster.isClosed()) {
connect();
}
session = cluster.connect();
PreparedStatement ps = session.prepare(CassandraClientConstants.INSERT_PRINT_JOB_DATA);
BatchStatement batch = new BatchStatement();
synchronized (this.printerJobQueue) {
while(true) {
if(!this.printerJobQueue.isEmpty()) {
for(int i = 1; i <= ExportManagerConstants.DATA_TRANSFER_BATCH_SIZE; i++) {
PrintJobDAO printJob = printerJobQueue.poll();
batch.add(ps.bind(printJob.getJobID(), printJob.getUserID(), printJob.getType(), printJob.getGpDate(), printJob.getDateTimes(),
printJob.getAppName(), printJob.getPrintedPages(), printJob.getSavedPages(), printJob.getPrinterID(), printJob.getWorkstationID(),
printJob.getPrintedCost(), printJob.getSavedCost(), printJob.getSourcePrinterID(), printJob.getSourcePrinterPrintedCost(),
printJob.getJcID(), printJob.getCoverageC(), printJob.getCoverageM(), printJob.getCoverageY(), printJob.getCoverageK(),
printJob.getCoverageTotal(), printJob.getPagesAnalyzed()));
printJobConsumed++;
}
session.execute(batch);
System.out.println("After Batch - " + batchInsertCount + ", record insert count : " + printJobConsumed);
batchInsertCount++;
this.printerJobQueue.notifyAll();
} else {
System.out.println("Print Job Queue is empty, nothing to export.");
try {
this.printerJobQueue.wait();
} catch (InterruptedException e) {
System.err.println(ExceptionUtils.getStackTrace(e));
}
}
if(ExportManager.isProducerCompleted() && this.printerJobQueue.isEmpty()) {
break;
}
}
}
}
}
经理班: -
public class ExportManager {
private static boolean isInitalized = false;
private static boolean producerCompleted = false;
private static MySQLPrintJobProducer printJobProducer = null;
private static CassandraPrintJobConsumer printJobConsumer = null;
private static BlockingQueue<PrintJobDAO> printJobQueue = null;
public static boolean isProducerCompleted() {
return producerCompleted;
}
public static void setProducerCompleted(boolean producerCompleted) {
ExportManager.producerCompleted = producerCompleted;
}
private static void init() throws MySQLClientException, CassandraClientException {
if(!isInitalized) {
printJobQueue = new LinkedBlockingQueue<PrintJobDAO>(ExportManagerConstants.DATA_TRANSFER_BATCH_SIZE * 2);
printJobProducer = new MySQLPrintJobProducer(printJobQueue);
printJobConsumer = new CassandraPrintJobConsumer(printJobQueue);
isInitalized = true;
}
}
public static void exportPrintJobs() throws ExportException {
try {
init();
} catch (MySQLClientException e) {
throw new ExportException("Print Job Export failed.", e);
} catch (CassandraClientException e) {
throw new ExportException("Print Job Export failed.", e);
}
Thread producerThread = new Thread(printJobProducer);
Thread consumerThread = new Thread(printJobConsumer);
consumerThread.start();
producerThread.start();
}
}
TestNG课程: -
public class TestExportManager {
@Test
public void testExportPrintJobs() {
try {
ExportManager.exportPrintJobs();
Thread.currentThread().join();
} catch (ExportException e) {
Assert.fail("ExportManager.exportPrintJobs() failed.", e);
} catch (InterruptedException e) {
Assert.fail("ExportManager.exportPrintJobs() failed.", e);
}
}
}
我已按照此link进行了一些配置更改。插入18000 - 20000条记录后,我仍然会遇到异常。
Exception in thread "Thread-2" com.datastax.driver.core.exceptions.NoHostAvailableException: All host(s) tried for query failed (tried: /192.168.10.80
(com.datastax.driver.core.exceptions.DriverException: Timeout during read))
at com.datastax.driver.core.exceptions.NoHostAvailableException.copy(NoHostAvailableException.java:64)
at com.datastax.driver.core.DefaultResultSetFuture.extractCauseFromExecutionException(DefaultResultSetFuture.java:256)
at com.datastax.driver.core.DefaultResultSetFuture.getUninterruptibly(DefaultResultSetFuture.java:172)
at com.datastax.driver.core.SessionManager.execute(SessionManager.java:91)
at com.incendiary.ga.client.cassandra.CassandraPrintJobConsumer.run(CassandraPrintJobConsumer.java:108)
at java.lang.Thread.run(Unknown Source)
Caused by: com.datastax.driver.core.exceptions.NoHostAvailableException: All host(s) tried for query failed (tried: /192.168.10.80 (com.datastax.drive
r.core.exceptions.DriverException: Timeout during read))
at com.datastax.driver.core.RequestHandler.sendRequest(RequestHandler.java:100)
at com.datastax.driver.core.RequestHandler$1.run(RequestHandler.java:171)
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
... 1 more
我无法弄清楚问题的实际原因。我在Cassandra系统日志中找不到任何异常。我正在使用Apache Cassandra 2.0.7和cassandra-driver-core 2.0.1。
答案 0 :(得分:0)
您可以在驾驶员方面增加读取时间。通过在此使用withSocket方法,您可以使用SocketOption类来读取超时。默认情况下,读取超时为10毫秒。