基本上我必须阅读csv文件并执行一些验证。 如果找到重复记录,我将删除先前的记录并插入最新的记录1。 该文件包含大约100k条记录。我不确定我做错了什么,但加载数据的时间太长了。
public static ArrayList<BootstrapMessage> loadLocation(File file) {
ArrayList<BootstrapMessage> errors = new ArrayList<BootstrapMessage>();
CSVReader reader = null;
Connection conn = null;
Connection conn2 = null;
PreparedStatement pstmt = null;
PreparedStatement ps = null;
try {
conn = ConnectionManager.getConnection();
conn2 = ConnectionManager.getConnection();
conn.setAutoCommit(false);
pstmt = conn.prepareStatement(INSERT_LOCATION);
ps = conn2.prepareStatement("delete from location where `timestamp` = ? AND mac_address = ?");
reader = new CSVReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
reader.readNext();//header
String[] record = reader.readNext();
int counter = 2;//starting from line 2. Line 1 is header
int validRecords = 0;
while (record != null) {
ArrayList<String> message = null;
//try {
message = ValidatorUtil.validateLocation(record, file.getName(), counter);
if (message != null) {//contains error
errors.add(new BootstrapMessage(file.getName(), counter, message));
} else {//valid record
String key = record[0] + record[1];
if (locations.containsKey(key)) {//duplicate found.
pstmt.executeBatch();
message = new ArrayList<String>();
message.add("duplicate row");
errors.add(new BootstrapMessage(file.getName(), locations.get(key), message));
//delete record from database
ps.setTimestamp(1, Timestamp.valueOf(record[0]));
ps.setString(2, record[1]);
ps.executeUpdate();
//inserting the latest record
pstmt.setTimestamp(1, Timestamp.valueOf(record[0]));
pstmt.setString(2, record[1]);
pstmt.setInt(3, Integer.parseInt(record[2]));
pstmt.addBatch();
if (validRecords % 2000 == 0) {
pstmt.executeBatch();
}
} else {
pstmt.setTimestamp(1, Timestamp.valueOf(record[0]));
pstmt.setString(2, record[1]);
pstmt.setInt(3, Integer.parseInt(record[2]));
pstmt.addBatch();
validRecords++;
if (validRecords % 2000 == 0) {
pstmt.executeBatch();
}
}
}
if (validRecords > 0) {
pstmt.executeBatch();
conn.commit();
}
record = reader.readNext();
counter++;
}
System.out.println("valid location records = " + validRecords);
//numOfValidRecords.put(fileName, validRecords);
if (!errors.isEmpty()) {
return errors;
}
} catch (FileNotFoundException ex) {
Logger.getLogger(LocationDAO.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
Logger.getLogger(LocationDAO.class.getName()).log(Level.SEVERE, null, ex);
} catch (SQLException ex) {
Logger.getLogger(LocationDAO.class.getName()).log(Level.SEVERE, null, ex);
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException ex) {
Logger.getLogger(LocationDAO.class.getName()).log(Level.SEVERE, null, ex);
}
}
ConnectionManager.close(conn2, ps);
ConnectionManager.close(conn, pstmt);
}
return null;
}
答案 0 :(得分:1)
为什么不使用本机数据库加载器来完成这项工作? 或者我首先将所有记录插入到分段中,然后使用数据库工具(SQL或某些数据库过程)执行重复删除。这样它必须更快。