我的mysql表中有150万条记录。我试图在批处理过程中读取所有记录,即计划批量读取1000条记录并在控制台中打印这些记录。
为此,我计划使用java实现多线程概念。我该如何实现呢?
答案 0 :(得分:1)
在MySQL中,您可以立即获取所有记录,或者以流媒体方式逐个获取这些记录(请参阅this answer)。或者,您可以使用limit
关键字进行分块(请参阅this answer)。
无论使用流式传输结果还是分块,您都可以在读取数据时使用多线程处理(或打印)数据。这通常使用生产者 - 消费者模式来完成,在这种情况下,生产者从数据库中检索数据,将其放在队列中,并且消费者从队列中获取数据并对其进行处理(例如,打印到控制台)。 / p>
虽然有一些管理开销:生产者和消费者都可以冻结或跳过错误,并且两者都需要知道这一点,以便它们不会永远挂起(可能会冻结您的应用程序)。这是"合理的"超时进入("合理"完全取决于你的情况)。
我试图将它放在一个最小的运行示例中,但它仍然是很多代码(见下文)。有两条注释行可用于测试超时情况。还有一个refreshTestData
变量可用于重复使用插入的记录(插入记录可能需要很长时间)。
为了保持清洁,省略了许多关键字,例如private/public
(即需要在非演示代码中添加这些关键字)。
import java.sql.*;
import java.util.*;
import java.util.concurrent.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class FetchRows {
private static final Logger log = LoggerFactory.getLogger(FetchRows.class);
public static void main(String[] args) {
try {
new FetchRows().print();
} catch (Exception e) {
e.printStackTrace();
}
}
void print() throws Exception {
Class.forName("com.mysql.jdbc.Driver").newInstance();
Properties dbProps = new Properties();
dbProps.setProperty("user", "test");
dbProps.setProperty("password", "test");
try (Connection conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/test", dbProps)) {
try (Statement st = conn.createStatement()) {
prepareTestData(st);
}
// https://stackoverflow.com/a/2448019/3080094
try (Statement st = conn.createStatement(java.sql.ResultSet.TYPE_FORWARD_ONLY,
java.sql.ResultSet.CONCUR_READ_ONLY)) {
st.setFetchSize(Integer.MIN_VALUE);
fetchAndPrintTestData(st);
}
}
}
boolean refreshTestData = true;
int maxRecords = 5_555;
void prepareTestData(Statement st) throws SQLException {
int recordCount = 0;
if (refreshTestData) {
st.execute("drop table if exists fetchrecords");
st.execute("create table fetchrecords (id mediumint not null auto_increment primary key, created timestamp default current_timestamp)");
for (int i = 0; i < maxRecords; i++) {
st.addBatch("insert into fetchrecords () values ()");
if (i % 500 == 0) {
st.executeBatch();
log.debug("{} records available.", i);
}
}
st.executeBatch();
recordCount = maxRecords;
} else {
try (ResultSet rs = st.executeQuery("select count(*) from fetchrecords")) {
rs.next();
recordCount = rs.getInt(1);
}
}
log.info("{} records available for testing.", recordCount);
}
int batchSize = 1_000;
int maxBatchesInMem = 3;
int printFinishTimeoutS = 5;
void fetchAndPrintTestData(Statement st) throws SQLException, InterruptedException {
final BlockingQueue<List<FetchRecordBean>> printQueue = new LinkedBlockingQueue<List<FetchRecordBean>>(maxBatchesInMem);
final PrintToConsole printTask = new PrintToConsole(printQueue);
new Thread(printTask).start();
try (ResultSet rs = st.executeQuery("select * from fetchrecords")) {
List<FetchRecordBean> l = new LinkedList<>();
while (rs.next()) {
FetchRecordBean bean = new FetchRecordBean();
bean.setId(rs.getInt("id"));
bean.setCreated(new java.util.Date(rs.getTimestamp("created").getTime()));
l.add(bean);
if (l.size() % batchSize == 0) {
/*
* The printTask can stop itself when this producer is too slow to put records on the print-queue.
* Therefor, also check printTask.isStopping() to break the while-loop.
*/
if (printTask.isStopping()) {
throw new TimeoutException("Print task has stopped.");
}
enqueue(printQueue, l);
l = new LinkedList<>();
}
}
if (l.size() > 0) {
enqueue(printQueue, l);
}
} catch (TimeoutException | InterruptedException e) {
log.error("Unable to finish printing records to console: {}", e.getMessage());
printTask.stop();
} finally {
log.info("Reading records finished.");
if (!printTask.isStopping()) {
try {
enqueue(printQueue, Collections.<FetchRecordBean> emptyList());
} catch (Exception e) {
log.error("Unable to signal last record to print.", e);
printTask.stop();
}
}
if (!printTask.await(printFinishTimeoutS, TimeUnit.SECONDS)) {
log.error("Print to console task did not finish.");
}
}
}
int enqueueTimeoutS = 5;
// To test a slow printer, see also Thread.sleep statement in PrintToConsole.print.
// int enqueueTimeoutS = 1;
void enqueue(BlockingQueue<List<FetchRecordBean>> printQueue, List<FetchRecordBean> l) throws InterruptedException, TimeoutException {
log.debug("Adding {} records to print-queue.", l.size());
if (!printQueue.offer(l, enqueueTimeoutS, TimeUnit.SECONDS)) {
throw new TimeoutException("Unable to put print data on queue within " + enqueueTimeoutS + " seconds.");
}
}
int dequeueTimeoutS = 5;
class PrintToConsole implements Runnable {
private final BlockingQueue<List<FetchRecordBean>> q;
private final CountDownLatch finishedLock = new CountDownLatch(1);
private volatile boolean stop;
public PrintToConsole(BlockingQueue<List<FetchRecordBean>> q) {
this.q = q;
}
@Override
public void run() {
try {
while (!stop) {
List<FetchRecordBean> l = q.poll(dequeueTimeoutS, TimeUnit.SECONDS);
if (l == null) {
log.error("Unable to get print data from queue within {} seconds.", dequeueTimeoutS);
break;
}
if (l.isEmpty()) {
break;
}
print(l);
}
if (stop) {
log.error("Printing to console was stopped.");
}
} catch (Exception e) {
log.error("Unable to print records to console.", e);
} finally {
if (!stop) {
stop = true;
log.info("Printing to console finished.");
}
finishedLock.countDown();
}
}
void print(List<FetchRecordBean> l) {
log.info("Got list with {} records from print-queue.", l.size());
// To test a slow printer, see also enqueueTimeoutS.
// try { Thread.sleep(1500L); } catch (Exception ignored) {}
}
public void stop() {
stop = true;
}
public boolean isStopping() {
return stop;
}
public void await() throws InterruptedException {
finishedLock.await();
}
public boolean await(long timeout, TimeUnit tunit) throws InterruptedException {
return finishedLock.await(timeout, tunit);
}
}
class FetchRecordBean {
private int id;
private java.util.Date created;
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public java.util.Date getCreated() {
return created;
}
public void setCreated(java.util.Date created) {
this.created = created;
}
}
}
依赖关系: