SQL executeBatch缓慢处理

时间:2015-09-27 14:59:16

标签: java mysql jdbc

基本上我必须阅读csv文件并执行一些验证。 如果找到重复记录,我将删除先前的记录并插入最新的记录1。 该文件包含大约100k条记录。我不确定我做错了什么,但加载数据的时间太长了。

  public static ArrayList<BootstrapMessage> loadLocation(File file) {
    ArrayList<BootstrapMessage> errors = new ArrayList<BootstrapMessage>();
    CSVReader reader = null;
    Connection conn = null;
    Connection conn2 = null;

    PreparedStatement pstmt = null;
    PreparedStatement ps = null;
    try {
        conn = ConnectionManager.getConnection();
        conn2 = ConnectionManager.getConnection();
        conn.setAutoCommit(false);
        pstmt = conn.prepareStatement(INSERT_LOCATION);
        ps = conn2.prepareStatement("delete from location where `timestamp` = ? AND mac_address = ?");
        reader = new CSVReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
        reader.readNext();//header
        String[] record = reader.readNext();
        int counter = 2;//starting from line 2. Line 1 is header
        int validRecords = 0;
        while (record != null) {
            ArrayList<String> message = null;
            //try {
            message = ValidatorUtil.validateLocation(record, file.getName(), counter);

            if (message != null) {//contains error
                errors.add(new BootstrapMessage(file.getName(), counter, message));
            } else {//valid record
                String key = record[0] + record[1];
                if (locations.containsKey(key)) {//duplicate found.
                    pstmt.executeBatch();
                    message = new ArrayList<String>();
                    message.add("duplicate row");
                    errors.add(new BootstrapMessage(file.getName(), locations.get(key), message));
                    //delete record from database 
                    ps.setTimestamp(1, Timestamp.valueOf(record[0]));
                    ps.setString(2, record[1]);
                    ps.executeUpdate();
                    //inserting the latest record
                    pstmt.setTimestamp(1, Timestamp.valueOf(record[0]));
                    pstmt.setString(2, record[1]);
                    pstmt.setInt(3, Integer.parseInt(record[2]));
                    pstmt.addBatch();
                    if (validRecords % 2000 == 0) {
                        pstmt.executeBatch();
                    }
                } else {
                    pstmt.setTimestamp(1, Timestamp.valueOf(record[0]));
                    pstmt.setString(2, record[1]);
                    pstmt.setInt(3, Integer.parseInt(record[2]));
                    pstmt.addBatch();
                    validRecords++;
                    if (validRecords % 2000 == 0) {
                        pstmt.executeBatch();
                    }
                }

            }

            if (validRecords > 0) {
                pstmt.executeBatch();
                conn.commit();
            }
            record = reader.readNext();
            counter++;
        }

        System.out.println("valid location records = " + validRecords);
        //numOfValidRecords.put(fileName, validRecords);
        if (!errors.isEmpty()) {
            return errors;
        }
    } catch (FileNotFoundException ex) {
        Logger.getLogger(LocationDAO.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(LocationDAO.class.getName()).log(Level.SEVERE, null, ex);
    } catch (SQLException ex) {
        Logger.getLogger(LocationDAO.class.getName()).log(Level.SEVERE, null, ex);
    } finally {
        if (reader != null) {
            try {
                reader.close();
            } catch (IOException ex) {
                Logger.getLogger(LocationDAO.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
        ConnectionManager.close(conn2, ps);
        ConnectionManager.close(conn, pstmt);
    }
    return null;
}

1 个答案:

答案 0 :(得分:1)

为什么不使用本机数据库加载器来完成这项工作? 或者我首先将所有记录插入到分段中,然后使用数据库工具(SQL或某些数据库过程)执行重复删除。这样它必须更快。