为什么从PreparedStatement调用executeBatch()非常慢?

时间:2015-07-23 15:04:02

标签: java oracle apache-spark

我正在使用Spark将记录插入Oracle DB。

当批量大小达到100K时,作业将调用executeBatch()。但是,这个电话花了太长时间。我已经附上了线程转储,请向我解释这里可能出现的问题是什么?是否有需要在Oracle端配置的东西?


    Thread 66: Executor task launch worker-4 (RUNNABLE)
    java.net.SocketOutputStream.socketWrite0(Native Method)
    java.net.SocketOutputStream.socketWrite(SocketOutputStream.java:113)
    java.net.SocketOutputStream.write(SocketOutputStream.java:159)
    oracle.net.ns.DataPacket.send(DataPacket.java:150)
    oracle.net.ns.NetOutputStream.write(NetOutputStream.java:145)
    oracle.jdbc.driver.T4CMAREngine.marshalCLR(T4CMAREngine.java:659)
    oracle.jdbc.driver.T4CMAREngine.marshalCLR(T4CMAREngine.java:634)
    oracle.jdbc.driver.T4CTTIrxd.marshal(T4CTTIrxd.java:619)
    oracle.jdbc.driver.T4C8Oall.marshalBinds(T4C8Oall.java:1794)
    oracle.jdbc.driver.T4C8Oall.marshalAll(T4C8Oall.java:1251)
    oracle.jdbc.driver.T4C8Oall.marshal(T4C8Oall.java:542)
    oracle.jdbc.driver.T4CPreparedStatement.doOall8(T4CPreparedStatement.java:180)
    oracle.jdbc.driver.T4CPreparedStatement.executeForRows(T4CPreparedStatement.java:953)
    oracle.jdbc.driver.OraclePreparedStatement.executeForRowsWithTimeout(OraclePreparedStatement.java:9215)
    oracle.jdbc.driver.OraclePreparedStatement.executeBatch(OraclePreparedStatement.java:9315)
    oracle.jdbc.driver.OracleStatementWrapper.executeBatch(OracleStatementWrapper.java:211)
    com.data.pro.DBStore$InsertFunction.call(DBStore.java:250)
    com.data.pro.DBStore$InsertFunction.call(DBStore.java:148)
    org.apache.spark.api.java.JavaRDDLike$$anonfun$foreachPartition$1.apply(JavaRDDLike.scala:195)
    org.apache.spark.api.java.JavaRDDLike$$anonfun$foreachPartition$1.apply(JavaRDDLike.scala:195)
    org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1.apply(RDD.scala:773)
    org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1.apply(RDD.scala:773)
    org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1319)
    org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1319)
    org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)
    org.apache.spark.scheduler.Task.run(Task.scala:56)
    org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196)
    java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
    java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
    java.lang.Thread.run(Thread.java:745)

public class DBStore {
    private static final Logger logger = LoggerFactory.getLogger(DBStore.class);

    public static class InsertFunction implements
            VoidFunction<Iterator<Tuple2<String, String>>> {

        private static final long serialVersionUID = 999955766876878L;
        private String dbuser = "";
        private String dbpass = "";
        private int batchsize;

        public InsertFunction(String dbuser, String dbpass, int batchsize) {
            super();
            this.dbuser = dbuser;
            this.dbuser = dbuser;
            this.batchsize = batchsize;
        }

        @Override
        public void call(Iterator<Tuple2<String, String>> results) {
            Connection connect = null;
            PreparedStatement pstmt = null;
            try {
                connect = getDBConnection(dbuser, dbpass);

                int count = 0;

                if (batchsize <= 0) {
                    batchsize = 10000;
                }

                pstmt = connect
                        .prepareStatement("MERGE INTO server_det a USING ("
                                + "Select ? Domain_Name, ? site_type, ? data_transfered FROM dual) B On (A.Domain_Name = B.Domain_Name and A.site_type = B.site_type) "
                                + "When Matched Then "
                                + "Update Set A.data_transfered = B.data_transfered "
                                + "When Not Matched Then "
                                + "Insert ( A.Domain_Name, A.data_transfered, A.site_type ) "
                                + "Values ( B.Domain_Name, B.data_transfered, B.site_type )");

                while (results.hasNext()) {

                    Tuple2<String, String> kv = results.next();

                    String[] data = kv._1.concat("," + kv._2).split(",");

                    pstmt.setString(1, data[0].toString());
                    pstmt.setDouble(2, Double.parseDouble(data[1].toString()));
                    pstmt.setString(3, data[2].toString());

                    pstmt.addBatch();

                    count++;

                    if (count == batchsize) {
                        pstmt.executeBatch();
                        count = 0;
                    }

                }

                pstmt.executeBatch();
                connect.commit();

            } catch (Exception e) {
                logger.error("InsertFunction error: " + e.getMessage());
            } finally {

                try {

                    if (pstmt != null) {
                        pstmt.close();
                    }
                    connect.close();
                } catch (SQLException e) {
                    logger.error("InsertFunction Connection Close error: "
                            + e.getMessage());
                }
            }
        }
    }
}

0 个答案:

没有答案