如何使用Golang的database.sql包批处理sql语句?
在Java中我会这样做:
// Create a prepared statement
String sql = "INSERT INTO my_table VALUES(?)";
PreparedStatement pstmt = connection.prepareStatement(sql);
// Insert 10 rows of data
for (int i=0; i<10; i++) {
pstmt.setString(1, ""+i);
pstmt.addBatch();
}
// Execute the batch
int [] updateCounts = pstmt.executeBatch();
我如何在Golang中实现同样的目标?
答案 0 :(得分:47)
由于db.Exec
函数是variadic,因此一个选项(实际上只进行单个网络往返)就是自己构造语句并将参数分解并传入。
示例代码:
func BulkInsert(unsavedRows []*ExampleRowStruct) error {
valueStrings := make([]string, 0, len(unsavedRows))
valueArgs := make([]interface{}, 0, len(unsavedRows) * 3)
for _, post := range unsavedRows {
valueStrings = append(valueStrings, "(?, ?, ?)")
valueArgs = append(valueArgs, post.Column1)
valueArgs = append(valueArgs, post.Column2)
valueArgs = append(valueArgs, post.Column3)
}
stmt := fmt.Sprintf("INSERT INTO my_sample_table (column1, column2, column3) VALUES %s", strings.Join(valueStrings, ","))
_, err := db.Exec(stmt, valueArgs...)
return err
}
在我运行的一个简单测试中,这个解决方案在插入10,000行时比在另一个答案中显示的Begin,Prepare,Commit快大约4倍 - 尽管实际的改进将在很大程度上取决于您的个人设置,网络延迟,等
答案 1 :(得分:12)
如果你正在使用PostgreSQL,那么pq支持bulk imports。
答案 2 :(得分:5)
扩展Avi Flax的答案,我在INSERT中需要一个ON CONFLICT DO UPDATE子句。
解决方案是将COPY复制到临时表(在事务结束时设置为删除),然后从临时表INSERT到永久表。
以下是我确定的代码:
func (fdata *FDataStore) saveToDBBulk(items map[fdataKey][]byte) (err error) {
tx, err := fdata.db.Begin()
if err != nil {
return errors.Wrap(err, "begin transaction")
}
txOK := false
defer func() {
if !txOK {
tx.Rollback()
}
}()
// The ON COMMIT DROP clause at the end makes sure that the table
// is cleaned up at the end of the transaction.
// While the "for{..} state machine" goroutine in charge of delayed
// saving ensures this function is not running twice at any given time.
_, err = tx.Exec(sqlFDataMakeTempTable)
// CREATE TEMPORARY TABLE fstore_data_load
// (map text NOT NULL, key text NOT NULL, data json)
// ON COMMIT DROP
if err != nil {
return errors.Wrap(err, "create temporary table")
}
stmt, err := tx.Prepare(pq.CopyIn(_sqlFDataTempTableName, "map", "key", "data"))
for key, val := range items {
_, err = stmt.Exec(string(key.Map), string(key.Key), string(val))
if err != nil {
return errors.Wrap(err, "loading COPY data")
}
}
_, err = stmt.Exec()
if err != nil {
return errors.Wrap(err, "flush COPY data")
}
err = stmt.Close()
if err != nil {
return errors.Wrap(err, "close COPY stmt")
}
_, err = tx.Exec(sqlFDataSetFromTemp)
// INSERT INTO fstore_data (map, key, data)
// SELECT map, key, data FROM fstore_data_load
// ON CONFLICT DO UPDATE SET data = EXCLUDED.data
if err != nil {
return errors.Wrap(err, "move from temporary to real table")
}
err = tx.Commit()
if err != nil {
return errors.Wrap(err, "commit transaction")
}
txOK = true
return nil
}
答案 3 :(得分:3)
为PostgreSQL调整Andrew's solution,它不支持?
占位符,以下作品:
func BulkInsert(unsavedRows []*ExampleRowStruct) error {
valueStrings := make([]string, 0, len(unsavedRows))
valueArgs := make([]interface{}, 0, len(unsavedRows) * 3)
i := 0
for _, post := range unsavedRows {
valueStrings = append(valueStrings, fmt.Sprintf("($%d, $%d, $%d)", i*3+1, i*3+2, i*3+3))
valueArgs = append(valueArgs, post.Column1)
valueArgs = append(valueArgs, post.Column2)
valueArgs = append(valueArgs, post.Column3)
i++
}
stmt := fmt.Sprintf("INSERT INTO my_sample_table (column1, column2, column3) VALUES %s", strings.Join(valueStrings, ","))
_, err := db.Exec(stmt, valueArgs...)
return err
}
答案 4 :(得分:2)
如果您使用的是Postgres,这里是@Debasish Mitra的解决方案。
功能示例:https://play.golang.org/p/dFFD2MrEy3J
替代示例:https://play.golang.org/p/vUtW0K4jVMd
<div class="navbar">
<div>
<a href="cpage1.html">First Peoples </a>
<a href="cpage2.html">Natives And Newcomers</a>
<a href="cpage3.html">Provincial Centre</a>
</div>
<div>
<a href="cpage4.html">Industrializing City</a>
<a href="cpage5.html">Wars And Crises</a>
<a href="cpage6.html">The Modern Metropolis</a>
</div>
</div>
func ReplaceSQL
-ms-
答案 5 :(得分:1)
无法通过database / sql中提供的接口进行批处理。但是,特定的数据库驱动程序可以单独支持它。例如,https://github.com/ziutek/mymysql似乎支持使用MySQL进行批处理。
答案 6 :(得分:1)
对于Postgres,lib pq支持批量插入:https://godoc.org/github.com/lib/pq#hdr-Bulk_imports
但是可以通过下面的代码实现相同的目的,但是真正有用的是当人们尝试执行批量条件更新(相应地更改查询)时。
要对Postgres执行类似的批量插入,可以使用以下功能。
// ReplaceSQL replaces the instance occurrence of any string pattern with an increasing $n based sequence
func ReplaceSQL(old, searchPattern string) string {
tmpCount := strings.Count(old, searchPattern)
for m := 1; m <= tmpCount; m++ {
old = strings.Replace(old, searchPattern, "$"+strconv.Itoa(m), 1)
}
return old
}
因此上面的示例变为
sqlStr := "INSERT INTO test(n1, n2, n3) VALUES "
vals := []interface{}{}
for _, row := range data {
sqlStr += "(?, ?, ?)," // Put "?" symbol equal to number of columns
vals = append(vals, row["v1"], row["v2"], row["v3"]) // Put row["v{n}"] blocks equal to number of columns
}
//trim the last ,
sqlStr = strings.TrimSuffix(sqlStr, ",")
//Replacing ? with $n for postgres
sqlStr = ReplaceSQL(sqlStr, "?")
//prepare the statement
stmt, _ := db.Prepare(sqlStr)
//format all vals at once
res, _ := stmt.Exec(vals...)
答案 7 :(得分:1)
我可以使用pq.CopyIn,它实际上比字符串值/参数方法快2.4倍(这非常有用,而且是一个很好的解决方案,顺便说一句,谢谢!)
我将1000万个int,varchar的测试值插入到一个结构中,并使用以下函数加载了该值。我对GoLang有点陌生,所以请忍受...
func copyData(client *client.DbClient, dataModels []*dataModel) error{
db := *client.DB
txn, err := db.Begin()
if err != nil {
return err
}
defer txn.Commit()
stmt, err := txn.Prepare(pq.CopyIn("_temp", "a", "b"))
if err != nil {
return(err)
}
for _, model := range dataModels{
_, err := stmt.Exec(model.a, model.b)
if err != nil {
txn.Rollback()
return err
}
}
_, err = stmt.Exec()
if err != nil {
return err
}
err = stmt.Close()
if err != nil {
return err
}
return nil
}
`
已用(字符串值/参数):1m30.60s。
经过(复制):37.57秒。
答案 8 :(得分:0)
采用Andrew C的想法,并使用sql标量变量使其适应我的工作。它完全适合我的工作中的特定要求。也许对某人有用,因为在golang中模拟sql的批处理很有用。就是这个主意。
func BulkInsert(unsavedRows []*ExampleRowStruct) error {
valueStrings := make([]string, 0, len(unsavedRows))
valueArgs := make([]interface{}, 0, len(unsavedRows) * 3)
i := 0
for _, post := range unsavedRows {
valueStrings = append(valueStrings, fmt.Sprintf("(@p%d, @p%d, @p%d)", i*3+1, i*3+2, i*3+3))
valueArgs = append(valueArgs, post.Column1)
valueArgs = append(valueArgs, post.Column2)
valueArgs = append(valueArgs, post.Column3)
i++
}
sqlQuery := fmt.Sprintf("INSERT INTO my_sample_table (column1, column2, column3) VALUES %s", strings.Join(valueStrings, ","))
var params []interface{}
for i := 0; i < len(valueArgs); i++ {
var param sql.NamedArg
param.Name = fmt.Sprintf("p%v", i+1)
param.Value = valueArgs[i]
params = append(params, param)
}
_, err := db.Exec(sqlQuery, params...)
return err
}
答案 9 :(得分:0)
使用链式语法查看的另一个更好的库是go-pg
https://github.com/go-pg/pg/wiki/Writing-Queries#insert
通过单个查询插入多本书:
err := db.Model(book1, book2).Insert()
答案 10 :(得分:0)
这是一个更通用的版本,用于根据@ andrew-c和@mastercarl的答案生成查询和值args:
// bulk / insert.go
import (
"strconv"
"strings"
)
type ValueExtractor = func(int) []interface{}
func Generate(tableName string, columns []string, numRows int, postgres bool, valueExtractor ValueExtractor) (string, []interface{}) {
numCols := len(columns)
var queryBuilder strings.Builder
queryBuilder.WriteString("INSERT INTO ")
queryBuilder.WriteString(tableName)
queryBuilder.WriteString("(")
for i, column := range columns {
queryBuilder.WriteString("\"")
queryBuilder.WriteString(column)
queryBuilder.WriteString("\"")
if i < numCols-1 {
queryBuilder.WriteString(",")
}
}
queryBuilder.WriteString(") VALUES ")
var valueArgs []interface{}
valueArgs = make([]interface{}, 0, numRows*numCols)
for rowIndex := 0; rowIndex < numRows; rowIndex++ {
queryBuilder.WriteString("(")
for colIndex := 0; colIndex < numCols; colIndex++ {
if postgres {
queryBuilder.WriteString("$")
queryBuilder.WriteString(strconv.Itoa(rowIndex*numCols + colIndex + 1))
} else {
queryBuilder.WriteString("?")
}
if colIndex < numCols-1 {
queryBuilder.WriteString(",")
}
}
queryBuilder.WriteString(")")
if rowIndex < numRows-1 {
queryBuilder.WriteString(",")
}
valueArgs = append(valueArgs, valueExtractor(rowIndex)...)
}
return queryBuilder.String(), valueArgs
}
// bulk / insert_test.go
import (
"fmt"
"strconv"
)
func valueExtractor(index int) []interface{} {
return []interface{}{
"trx-" + strconv.Itoa(index),
"name-" + strconv.Itoa(index),
index,
}
}
func ExampleGeneratePostgres() {
query, valueArgs := Generate("tbl_persons", []string{"transaction_id", "name", "age"}, 3, true, valueExtractor)
fmt.Println(query)
fmt.Println(valueArgs)
// Output:
// INSERT INTO tbl_persons("transaction_id","name","age") VALUES ($1,$2,$3),($4,$5,$6),($7,$8,$9)
// [[trx-0 name-0 0] [trx-1 name-1 1] [trx-2 name-2 2]]
}
func ExampleGenerateOthers() {
query, valueArgs := Generate("tbl_persons", []string{"transaction_id", "name", "age"}, 3, false, valueExtractor)
fmt.Println(query)
fmt.Println(valueArgs)
// Output:
// INSERT INTO tbl_persons("transaction_id","name","age") VALUES (?,?,?),(?,?,?),(?,?,?)
// [[trx-0 name-0 0] [trx-1 name-1 1] [trx-2 name-2 2]]
}
答案 11 :(得分:0)
万一有人使用pgx(Golang中最好的Postgres驱动程序),请参见以下解决方案: https://github.com/jackc/pgx/issues/764#issuecomment-685249471