首先,我想说我已经查看了Golang download multiple files in parallel using goroutines和Example for sync.WaitGroup correct?,并且我已将它们用作我的代码中的指南。但是我不确定它对我有用。我正在尝试从aws上的多个桶下载文件。这就是我所拥有的(出于安全原因,某些行将为空白)。
package main
import (
"fmt"
"os"
"os/user"
"path/filepath"
"sync"
"time"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/aws/aws-sdk-go/service/s3/s3manager"
)
var (
//Bucket = "" // Download from this bucket
Prefix = "" // Using this key prefix
LocalDirectory = "s3logs" // Into this directory
)
// create a single session to be used
var sess = session.New()
// used to control concurrency
var wg sync.WaitGroup
func main() {
start := time.Now()
//map of buckets to region
regBuckets := map[string]string{
}
// download the files for each bucket
for region, bucket := range regBuckets {
fmt.Println(region)
wg.Add(1)
go getLogs(region, bucket, LocalDirectory, &wg)
}
wg.Wait()
elapsed := time.Since(start)
fmt.Printf("\nTime took %s\n", elapsed)
}
// function to get data from buckets
func getLogs(region string, bucket string, directory string, wg *sync.WaitGroup) {
client := s3.New(sess, &aws.Config{Region: aws.String(region)})
params := &s3.ListObjectsInput{Bucket: &bucket, Prefix: &Prefix}
manager := s3manager.NewDownloaderWithClient(client, func(d *s3manager.Downloader) {
d.PartSize = 6 * 1024 * 1024 // 6MB per part
d.Concurrency = 5
})
d := downloader{bucket: bucket, dir: directory, Downloader: manager}
client.ListObjectsPages(params, d.eachPage)
wg.Done()
}
// downloader object and methods
type downloader struct {
*s3manager.Downloader
bucket, dir string
}
func (d *downloader) eachPage(page *s3.ListObjectsOutput, more bool) bool {
for _, obj := range page.Contents {
d.downloadToFile(*obj.Key)
}
return true
}
func (d *downloader) downloadToFile(key string) {
// Create the directories in the path
// desktop path
user, errs := user.Current()
if errs != nil {
panic(errs)
}
homedir := user.HomeDir
desktop := homedir + "/Desktop/" + d.dir
file := filepath.Join(desktop, key)
if err := os.MkdirAll(filepath.Dir(file), 0775); err != nil {
panic(err)
}
// Setup the local file
fd, err := os.Create(file)
if err != nil {
panic(err)
}
defer fd.Close()
// Download the file using the AWS SDK
fmt.Printf("Downloading s3://%s/%s to %s...\n", d.bucket, key, file)
params := &s3.GetObjectInput{Bucket: &d.bucket, Key: &key}
d.Download(fd, params)
_, e := d.Download(fd, params)
if e != nil {
panic(e)
}
}
在regBuckets
散列图中,我放置了bucket names : regions
的列表在下面的for循环中,我打印了存储桶名称。所以,如果我有两个桶,我想同时从两个桶中下载这些项目。我正在用打印声明对此进行测试。我除了看到第一个桶的名称,并且在第二个桶的名称后不久。然而,似乎不是从多个桶下载文件并行它正在按顺序下载它们,例如桶1完成桶1时for循环继续然后桶2 ...等所以我需要帮助确保我正在下载并行因为我有大约10个桶,速度很重要。我也想知道是不是因为我正在使用一个会话。有什么想法吗?