Question

我需要从AWS S3通过HTTP获取一些大对象（最多50GB）并计算每个对象的哈希值。我还需要获取内容类型，如果是某种类型，请获取一些元数据。

我想要并行完成上述所有操作。

我正在考虑使用io.MultiWriter和io.Pipe（改编自solution #5 in this article）。

func handleUpload(u io.Reader) {
    // create the pipes
    contentTypeR, contentTypeW := io.Pipe()
    metaR, metaW := io.Pipe()
    hashR, hashW := io.Pipe()

    // create channel to synchronize
    done := make(chan bool)
    defer close(done)

    contentTypeCh := make(chan string)
    defer close(contentTypeCh)

    go getContentType(contentTypeR, contentTypeCh, done)
    go processMetadata(metaR, contentTypeCh, done)
    go calculateHash(hashR, done)

    go func() {
        defer contentTypeW.Close
        defer metaW.Close()
        defer hashW.Close()

        mw := io.MultiWriter(contentTypeW, metaW, hashW)

        io.Copy(mw, u)
    }()

    // wait until all are done
    for c := 0; c < 3; c++ {
        <-done
    }
}

func getContentType(r io.Reader, contentTypeCh chan <- string, done <-chan bool) {

    lr := io.LimitReader(r, 512) // only read the first 512 bytes
    first512, err := ioutil.ReadAll(r)

    if err != nil{
        //do something with the error
    }

    contentType := http.DetectContentType(first512)

    contentTypeCh <- contentType

    ...
}

func processMetadata(r io.Reader, contentTypeCh <-chan string, done <-chan bool) {

    contentType := <-contentTypeCh

    if contentType != "image" {
        return // don't get the metadata if the type is not an image
    }

    ...
}

我主要关心的是在goroutines中执行的内容类型检测和进程元数据功能。对于内容类型检测，我们只读取前512个字节。对于元数据处理，如果内容类型不是图像，我们不会执行任何操作。

如果多路写入器继续使用io.Copy()将数据写入管道并且管道读取器未被读取，是否会导致内存或资源泄漏？

如果我在使用io.MultiWriter时没有从io.Reader读取，是否会出现内存或资源泄漏？

0 个答案: