我正在尝试学习Go语言,我想向postgresql导入器编写一个小的文本文件。我做过类似于Java的操作,最后我将比较性能差异。
我正在努力解决的问题是如何关闭lineChannel
。据我了解,问题出在这个for循环中:
for {
currentGoroutinesChannel <- 1
path, morePaths := <-filePathChannel
if !morePaths {
log.Println("No more files to process")
close(lineChannel)
break
}
fmt.Println("processing file: ", path)
go readFile(path, compiledRegex, lineChannel, currentGoroutinesChannel)
fmt.Println("Number of Goroutines: ", runtime.NumGoroutine())
}
在lineChannel
中没有剩余的东西之前,我不等待任何goroutine在关闭filePathChannel
之前完成。
完整代码:
package main
import (
"database/sql"
"flag"
"fmt"
"github.com/lib/pq"
_ "github.com/lib/pq"
"io/ioutil"
"log"
"os"
"path/filepath"
"regexp"
"runtime"
"strings"
)
func main() {
// Flags
dataSource := flag.String("input", "/Users/joshuahemmings/Documents/Dev/Personal/GoTxtToPostgres/testDocuments", "Data to Import [STRING]")
delimiters := flag.String("delimiters", ";:|", "delimiters list [STRING]")
concurrency := flag.Int("concurrency", 2, "Concurrency (amount of GoRoutines) [INT]")
copySize := flag.Int("copySize", 5000, "How many rows get imported per execution [INT]")
flag.Parse()
compiledRegex := regexp.MustCompile("^(.?)[" + *delimiters + "](.)$")
lineChannel := make(chan string, 1000)
filePathChannel := make(chan string, 1000)
currentGoroutinesChannel := make(chan int, *concurrency)
stopToolChannel := make(chan bool, 1)
connStr := "user=todo dbname=golang password=123 sslmode=disable"
db, err := sql.Open("postgres", connStr)
if err != nil {
log.Fatal(err)
}
_ = filepath.Walk(*dataSource,
func(path string, file os.FileInfo, err error) error {
if err != nil {
log.Fatalf("Error reading %s: %v", path, err)
return nil
}
if file.IsDir() {
return nil
}
if filepath.Ext(file.Name()) == ".txt" {
log.Printf("reading %s, %vB", path, file.Size())
filePathChannel <- path
}
return nil
})
close(filePathChannel)
go textToPostgres(&lineChannel, *copySize, *db, &stopToolChannel)
for {
currentGoroutinesChannel <- 1
path, morePaths := <-filePathChannel
if !morePaths {
log.Println("No more files to process")
close(lineChannel)
break
}
fmt.Println("processing file: ", path)
go readFile(path, compiledRegex, lineChannel, currentGoroutinesChannel)
fmt.Println("Number of Goroutines: ", runtime.NumGoroutine())
}
fmt.Println("Number of Goroutines: ", runtime.NumGoroutine())
<-stopToolChannel
fmt.Println("Stopping tool")
}
func readFile(path string, delimiters *regexp.Regexp, lineChannel chan string, currentGoroutinesChannel chan int) {
fileData, err := ioutil.ReadFile(path)
if err != nil {
log.Fatalf("Cannot read file %s", path)
return
}
fileAsString := string(fileData)
fileData = nil
lines := strings.Split(fileAsString, "\n")
fileAsString = ""
for _, line := range lines {
line = strings.TrimSpace(line)
if line != "" {
lineChannel <- delimiters.ReplaceAllString(line, "${1}:$2")
}
}
log.Printf("Done reading %s", path)
<-currentGoroutinesChannel
}
func textToPostgres(lineChannel *chan string, copySize int, db sql.DB, stopToolChannel *chan bool) {
const query = `
CREATE TABLE IF NOT EXISTS table (
col1 varchar(300),
col2 varchar(300)
)`
_, err := db.Exec(query)
if err != nil {
log.Fatal("Failed to create table if exists")
}
lineCount := 0
txn, err := db.Begin()
if err != nil {
log.Fatal(err)
}
stmt, err := txn.Prepare(pq.CopyIn("table", "col1", "col2"))
if err != nil {
log.Fatal(err)
}
for {
line, more := <-*lineChannel
fmt.Println("Processing LINE: " + line)
copySize++;
splitLine := strings.SplitN(line, ":", 2)
if len(splitLine) == 2 {
if lineCount%copySize == 0 {
fmt.Println("Commiting: " + splitLine[0] + " Password: " + splitLine[1])
_, err = stmt.Exec(splitLine[0], splitLine[1])
if err != nil {
fmt.Println("Error on split Line")
log.Fatal(err)
}
}
}
if !more {
fmt.Println("NO MORE LINES")
_, err = stmt.Exec()
if err != nil {
log.Fatal(err)
}
err = stmt.Close()
if err != nil {
log.Fatal(err)
}
err = txn.Commit()
if err != nil {
log.Fatal(err)
}
break
}
}
*stopToolChannel <- true
}
您将如何解决我的小问题?
谢谢您的建议!