Faster transfer of 2 billion records between two database tables

时间:2018-04-18 18:06:27

标签: java mysql jdbc connection prepared-statement

The script below fetches each row from table (q_iTable) having 2 billion records in database db1, gets two column values (iIndex and pIndex) and modifies them into json data structure. This modified data is then inserted into iTable in db2. Also, the values of columns from the two tables are checked and the update or insert of iTable in db2 is done accordingly.

This transfer of data from q_iTable to iTable is currently being done at a rate of 700 records/minute which will basically takes years to transfer the entire 2 billion record. I would like to know how I can make this process quick such that the entire 2 billion record can be transferred faster. Any help/guidance would be highly appreciated!

public class Transfer {
    public static void main(String[] args) throws SQLException {
        Logger log = LoggerFactory.getLogger(Transfer.class);
        Connection con = null;
        PreparedStatement stat = null;
        ResultSet results = null;
        Connection con2 =null;
        Statement stmt2 = null;
        try {
            con = DriverManager.getConnection("jdbc:mysql:ip_address/port", "username", "password");
            con.setAutoCommit(false);
            stat = con.prepareStatement(
                    "SELECT * FROM db1.q_iTable;",
                    ResultSet.TYPE_FORWARD_ONLY,
                    ResultSet.CONCUR_READ_ONLY);
            stat.setFetchSize(Integer.MIN_VALUE);
            results = stat.executeQuery();

            con2 = DriverManager.getConnection("jdbc:mysql:ip_address/port", "username", "password");
            stmt2 = con2.createStatement();
            while (results.next()) {
                String iSerial = results.getString("iSerial");
                String iIndex = results.getString("iIndex");
                String pIndex = results.getString("pIndex");

                JSONObject jsonSubObject = null;
                JSONArray jsonArrayRET = new JSONArray();
                jsonSubObject = new JSONObject();
                jsonSubObject.put("iIndex", iIndex);
                jsonSubObject.put("pIndex", pIndex);

                jsonArrayRET.add(jsonSubObject);

                String properties = jsonArrayRET.toString();

                String prevSQL = "SELECT * FROM db2.iTable ORDER BY id DESC LIMIT 1;";
                ResultSet rs = stmt2.executeQuery( prevSQL );
                if(rs.next()) {
                    String previSerial = rs.getString("iSerial");
                    String previIndex = rs.getString("iIndex");
                    String prevpIndex = rs.getString("pIndex");

                    if((previSerial.equals(iSerial)) && (previIndex.equals(iIndex))) {
                        String query = "update db2.iTable set iIndex=\""+iIndex+"\" ORDER BY id DESC LIMIT 1;";
                        stmt2.executeUpdate(query);
                    } else {
                        String query = "insert into db2.iTable values(" + NULL + ", \'" + iSerial + "\', \'" + properties + "\');";
                        stmt2.executeUpdate(query);
                    }
                } else  {
                    String query = "insert into db2.iTable values(" + NULL + ", \'" + iSerial + "\', \'" + properties + "\');";
                    stmt2.executeUpdate(query);
                }
            }
        } catch (SQLException e) {
                throw new IllegalStateException("cannot connect", e);
        } catch (ParseException e) {
            e.printStackTrace();
        } catch (ProcessingException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            results.close();
            stat.close();
            con.close();
            con2.close();
            stmt2.close();
        }
    }
}

EDIT with incorporating addBatch() and executeBatch():

I am using addBatch() and executeBatch() but I am not observing much of a difference with the data transfer rate. Any suggestions?

public class Transfer {
    public static void main(String[] args) throws SQLException {
        Logger log = LoggerFactory.getLogger(Transfer.class);
        Connection con = null;
        PreparedStatement stat = null;
        ResultSet results = null;
        Connection con2 =null;
        Statement stmt2 = null;
        try {
            con = DriverManager.getConnection("jdbc:mysql:ip_address/port", "username", "password");
            con.setAutoCommit(false);
            stat = con.prepareStatement(
                    "SELECT * FROM db1.q_iTable;",
                    ResultSet.TYPE_FORWARD_ONLY,
                    ResultSet.CONCUR_READ_ONLY);
            stat.setFetchSize(Integer.MIN_VALUE);
            results = stat.executeQuery();

            con2 = DriverManager.getConnection("jdbc:mysql:ip_address/port", "username", "password");
            stmt2 = con2.createStatement();
            while (results.next()) {
                int count = 0;
                String iSerial = results.getString("iSerial");
                String iIndex = results.getString("iIndex");
                String pIndex = results.getString("pIndex");

                JSONObject jsonSubObject = null;
                JSONArray jsonArrayRET = new JSONArray();
                jsonSubObject = new JSONObject();
                jsonSubObject.put("iIndex", iIndex);
                jsonSubObject.put("pIndex", pIndex);

                jsonArrayRET.add(jsonSubObject);

                String properties = jsonArrayRET.toString();

                String prevSQL = "SELECT * FROM db2.iTable ORDER BY id DESC LIMIT 1;";
                ResultSet rs = stmt2.executeQuery( prevSQL );
                if(rs.next()) {
                    String previSerial = rs.getString("iSerial");
                    String previIndex = rs.getString("iIndex");
                    String prevpIndex = rs.getString("pIndex");

                    if((previSerial.equals(iSerial)) && (previIndex.equals(iIndex))) {
                        String query = "update db2.iTable set iIndex=\""+iIndex+"\" ORDER BY id DESC LIMIT 1;";
                        stmt2.addBatch(query);
                    } else {
                        String query = "insert into db2.iTable values(" + NULL + ", \'" + iSerial + "\', \'" + properties + "\');";
                        stmt2.addBatch(query);
                    }
                } else  {
                    String query = "insert into db2.iTable values(" + NULL + ", \'" + iSerial + "\', \'" + properties + "\');";
                    stmt2.addBatch(query);
                }
                if (count % 1000 == 0 ) {
                    pgStmt.executeBatch();
                    pgCon.commit();
                }
            }
        } catch (SQLException e) {
                throw new IllegalStateException("cannot connect", e);
        } catch (ParseException e) {
            e.printStackTrace();
        } catch (ProcessingException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            results.close();
            stat.close();
            con.close();
            con2.close();
            stmt2.close();
        }
    }
}

0 个答案:

没有答案