随着时间的推移,SQLite插入会慢慢爬行

时间:2017-01-08 01:24:44

标签: java performance sqlite jdbc database-performance

我使用下面的程序将非常大的.csv文件(约250万行)中的值插入到SQLite DB中。它开始非常快,但随着时间的推移开始变慢,最终无限期地停留在大约900,000行。我的预感说它以某种方式消耗了内存,但不是内存泄漏,因为它从不抛出OutOfMemoryException或类似内容。要清楚,程序永远不会失败或崩溃。它会变慢,直到它停止前进。我的笔记本电脑上的所有其他进程也受到影响,最终甚至需要约10秒才能记录鼠标移动。

我对数据库不是很有经验,所以我可能很容易对我执行INSERT语句的方式感到愚蠢。我最近的修改是使用PreparedStatement.addBatch()PreparedStatement.executeBatch(),尽管阅读了文档,但我仍然不清楚我是否正确使用它们。我使用sqlite-jdbc-3.7.2.jar,如果这有所不同。

public class Database{

        public static void main(String[] args){
            Connection c = connect("db.db");
//            createTable(c);
            addCSVToDatabase(c, "test-10000.csv");
//            print(c);
            disconnect(c);
        }

        public static void createTable(Connection c) {
            Statement stmt;
            String sql = "CREATE TABLE results("
                    + "ID            INTEGER    NOT NULL    PRIMARY KEY AUTOINCREMENT, "
                    + "TITLE         TEXT       NOT NULL, "
                    + "URL           TEXT       NOT NULL    UNIQUE, "
                    + "BEAN  BLOB"
                    + ");"; 
            System.out.println("QUERY: " + sql);
            try {
                stmt = c.createStatement();
                stmt.executeUpdate(sql);
            } catch (SQLException e) { e.printStackTrace();}
        }

        public static void addCSVToDatabase(Connection c, String csvFile){

            BufferedReader reader = null;
            int x = 0;
            DBEntryBean b;
            String[] vals;
            ByteArrayOutputStream baos = null;
            ObjectOutputStream oos = null;
            PreparedStatement pstmt = null;
            String sql = "INSERT OR IGNORE INTO results("
                    + "TITLE, "
                    + "URL, "
                    + "BEAN"
                    + ") VALUES(?, ?, ?);";
            try{
                pstmt = c.prepareStatement(sql);
                reader = new BufferedReader(new InputStreamReader(new FileInputStream(csvFile), "UTF-8"));
                c.setAutoCommit(false);

                for(String line; (line = reader.readLine()) != null;){

                    vals = line.split("\\|"); // Each line is of the form: "title|URL|...|...|..."
                    b = new DBEntryBean();
                    b.setTitle(vals[0]);
                    b.setURL(vals[1]);

                    pstmt.setString(Constants.DB_COL_TITLE, b.getTitle());      
                    pstmt.setString(Constants.DB_COL_URL, b.getURL());  

                    // Store the DBEntryBean in the table so I can retrieve it, rather than construct a new one every time I need it.
                    baos = new ByteArrayOutputStream();
                    oos = new ObjectOutputStream(baos);
                    oos.writeObject(b);
                    pstmt.setBytes(Constants.DB_COL_BEAN, baos.toByteArray());
                    pstmt.addBatch();
                    pstmt.executeBatch(); 
                    System.out.println("Line: " + x++);
                }
            } catch (Exception e){ e.printStackTrace(); 
            } finally{
                try{ 
                    if(pstmt != null){ pstmt.close(); }
                    c.setAutoCommit(true);
                } catch (SQLException e) { e.printStackTrace(); }
            }
        }

        private static Connection connect(String path) {

            String url = "jdbc:sqlite:" + path;
            Connection conn = null;
            try {
                Class.forName("org.sqlite.JDBC");
                conn = DriverManager.getConnection(url);
            } catch (Exception e) { e.printStackTrace(); }
            return conn;
        }

        private static void disconnect(Connection c) {
            try{ if(c != null){ c.close(); }
            } catch(SQLException e){ e.printStackTrace(); }
        }

        private static void print(Connection c){
            Statement stmt = null;
            String sql = "SELECT * FROM results;";
            ResultSet rs = null;
            try {
                stmt = c.createStatement();
                rs = stmt.executeQuery(sql);
                while(rs.next()){
                    System.out.println(rs.getString("TITLE"));
                }
            } catch(Exception e){ e.printStackTrace(); }
        }


    }

1 个答案:

答案 0 :(得分:0)

尝试删除setAutoCommit调用并仅在批量合理大量插入时执行executeBatch。此外,每次插入时都不要打印到控制台。例如:

public static void addCSVToDatabase(Connection c, String csvFile) {

    BufferedReader reader = null;
    int batch = 0;
    int total = 0;
    DBEntryBean b;
    String[] vals;
    ByteArrayOutputStream baos = null;
    ObjectOutputStream oos = null;
    PreparedStatement pstmt = null;
    String sql = "INSERT OR IGNORE INTO results("
        + "TITLE, "
        + "URL, "
        + "BEAN"
        + ") VALUES(?, ?, ?);";

    try {
        pstmt = c.prepareStatement(sql);
        reader = new BufferedReader(new InputStreamReader(new FileInputStream(csvFile), "UTF-8"));

        for(String line; (line = reader.readLine()) != null;) {

            vals = line.split("\\|");
            b = new DBEntryBean();
            b.setTitle(vals[0]);
            b.setURL(vals[1]);

            baos = new ByteArrayOutputStream();
            oos = new ObjectOutputStream(baos);
            oos.writeObject(b);

            pstmt.setString(Constants.DB_COL_TITLE, b.getTitle());
            pstmt.setString(Constants.DB_COL_URL, b.getURL());
            pstmt.setBytes(Constants.DB_COL_BEAN, baos.toByteArray());

            pstmt.addBatch();
            ++batch;
            ++total;

            if (batch == 10000) {
                pstmt.executeBatch(); 
                System.out.println("Total: " + total);
                batch = 0;
            }
        }

        if (batch > 0) {
            pstmt.executeBatch(); 
            System.out.println("Total: " + total);
        }

    } catch (Exception e) { e.printStackTrace();
    } finally {
        try{ 
            if(pstmt != null) { pstmt.close(); }
        } catch (SQLException e) { e.printStackTrace(); }
    }
}

如果表现仍然很糟糕,我会建议您及时更改一件事,看看是否可以隔离问题。例如,删除UNIQUE列上的URL索引,以查看始终插入的性能。或者删除插入BLOB等