这是一个更好的并行编程方式吗?

时间:2017-07-20 03:51:25

标签: go parallel-processing channel

我制作了这个脚本,以便从Instagram获得“影响者”的追随者数量

我从它获得的“运行时”号码在550-750ms之间。 这不是那么糟糕,但我想知道它是否会更好(因为我是一个golang noob - 只学习3周)

package main

import (
    "encoding/json"
    "fmt"
    "io/ioutil"
    "log"
    "net/http"
    "sync"
    "time"
)

type user struct {
    User userData `json:"user"`
}

type userData struct {
    Followers count `json:"followed_by"`
}

type count struct {
    Count int `json:"count"`
}

func getFollowerCount(in <-chan string) <-chan int {
    out := make(chan int)
    go func() {
        for un := range in {
            URL := "https://www.instagram.com/" + un + "/?__a=1"
            resp, err := http.Get(URL)
            if err != nil {
                // handle error
                fmt.Println(err)
            }
            defer resp.Body.Close()
            body, err := ioutil.ReadAll(resp.Body)
            var u user
            err = json.Unmarshal(body, &u)
            if err != nil {
                fmt.Println(err)
            }
            // return u.User.Followers.Count
            out <- u.User.Followers.Count
        }
        close(out)
    }()
    return out
}

func merge(cs ...<-chan int) <-chan int {
    var wg sync.WaitGroup
    out := make(chan int)
    output := func(c <-chan int) {
        for n := range c {
            out <- n
        }
        wg.Done()
    }

    wg.Add(len(cs))
    for _, c := range cs {
        go output(c)
    }
    go func() {
        wg.Wait()
        close(out)
    }()
    return out
}

func gen(users ...string) <-chan string {
    out := make(chan string)
    go func() {
        for _, u := range users {
            out <- u
        }
        close(out)
    }()
    return out
}

func main() {
    start := time.Now()
    fmt.Println("STARTING UP")
    usrs := []string{"kanywest", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"}
    in := gen(usrs...)
    d1 := getFollowerCount(in)
    d2 := getFollowerCount(in)
    d3 := getFollowerCount(in)
    d4 := getFollowerCount(in)
    d5 := getFollowerCount(in)
    d6 := getFollowerCount(in)
    d7 := getFollowerCount(in)
    d8 := getFollowerCount(in)
    d9 := getFollowerCount(in)
    d10 := getFollowerCount(in)

    for d := range merge(d1, d2, d3, d4, d5, d6, d7, d8, d9, d10) {
        fmt.Println(d)
    }

    elapsed := time.Since(start)
    log.Println("runtime", elapsed)
}

2 个答案:

答案 0 :(得分:2)

我同意jeevatkm,有很多方法可以实现你的任务并改进它。一些说明:

  1. 分离实际执行作业的功能(即从远程服务获取结果)和负责协调所有作业的功能。
  2. 最好将error传播给调用者,而不是在要调用的函数中使用它(句柄)。
  3. 由于作业是在 parallel 中完成的,因此结果可能会以不确定的顺序返回。因此,除了跟随者计数,结果还应包含其他相关信息。
  4. 以下实施可能是一种选择:

    package main
    
    import (
        "encoding/json"
        "errors"
        "fmt"
        "net/http"
        "sync"
        "time"
    )
    
    type user struct {
        User userData `json:"user"`
    }
    
    type userData struct {
        Followers count `json:"followed_by"`
    }
    
    type count struct {
        Count int `json:"count"`
    }
    
    //Wrap username, count, and error. See (3) above.
    type follower struct {
        Username string
        Count    int
        Error    error
    }
    
    //GetFollowerCountFunc is a function for
    //fetching follower count of a specific user.
    type GetFollowerCountFunc func(string) (int, error)
    
    //Mockup function for test
    func mockGetFollowerCountFor(userName string) (int, error) {
        if len(userName) < 9 {
            return -1, errors.New("mocking error in get follower count")
        }
        return 10, nil
    }
    
    //Fetch result from remote service. See (1) above.
    func getFollowerCountFor(userName string) (int, error) {
        URL := "https://www.instagram.com/" + userName + "/?__a=1"
        resp, err := http.Get(URL)
        if err != nil {
            return -1, err
        }
        defer resp.Body.Close()
    
        var u user
        if err := json.NewDecoder(resp.Body).Decode(&u); err != nil {
            return -1, err
        }
        return u.User.Followers.Count, nil
    }
    
    //Function that coordinates/distributes the jobs. See (1), (2) above.
    func getFollowersAsync(users []string, fn GetFollowerCountFunc) <-chan follower {
        //allocate channels for storing result
        //number of allocated channels define the maximum *parallel* worker
        followers := make(chan follower, len(users))
        //The following is also valid
        //followers := make(chan follower, 5)
    
        //Do the job distribution in goroutine (Asynchronously)
        go func() {
            var wg sync.WaitGroup
            wg.Add(len(users))
            for _, u := range users {
                //Run a *parallel* worker
                go func(uid string) {
                    cnt, err := fn(uid)
                    if err != nil {
                        followers <- follower{uid, -1, err}
                    } else {
                        followers <- follower{uid, cnt, nil}
                    }
                    wg.Done()
                }(u)
            }
            //wait all workers finish
            wg.Wait()
    
            //close the channels so the `for ... range` will exit gracefully
            close(followers)
        }()
    
        //This function will returns immediately
        return followers
    }
    
    func main() {
        start := time.Now()
        fmt.Println("STARTING UP")
        usrs := []string{"kanywest", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"}
    
        results := getFollowersAsync(usrs, getFollowerCountFor)
        //For TESTING:
        //results := getFollowersAsync(usrs, mockGetFollowerCountFor)
        for r := range results {
            if r.Error != nil {
                fmt.Printf("Error for user '%s' => %v", r.Username, r.Error)
            } else {
                fmt.Printf("%s: %d\n", r.Username, r.Count)
            }
        }
    
        elapsed := time.Since(start)
        fmt.Println("runtime", elapsed)
    }
    

答案 1 :(得分:1)

欢迎来到Go,快乐学习。

你做得很好,你可以通过很多方式改进你的程序(例如json解码器,少一些陈等)。以下是其中一种方法。执行时间介于352-446ms之间(由于网络调用涉及您的代码,因此可以使用盐,因为服务器响应时间可能会有所不同。)

您的更新代码:

package main

import (
    "encoding/json"
    "fmt"
    "log"
    "net/http"
    "sync"
    "time"
)

type user struct {
    User userData `json:"user"`
}

type userData struct {
    Followers count `json:"followed_by"`
}

type count struct {
    Count int `json:"count"`
}

func getFollowerCount(username string, result chan<- int, wg *sync.WaitGroup) {
    defer wg.Done()
    reqURL := "https://www.instagram.com/" + username + "/?__a=1"
    resp, err := http.Get(reqURL)
    if err != nil {
        log.Println(err)
        return
    }
    defer resp.Body.Close()

    var u user
    if err := json.NewDecoder(resp.Body).Decode(&u); err != nil {
        log.Println(err)
        return
    }
    result <- u.User.Followers.Count
}

func execute(users []string, result chan<- int) {
    wg := &sync.WaitGroup{}
    for _, username := range users {
        wg.Add(1)
        go getFollowerCount(username, result, wg)
    }
    wg.Wait()
    result <- -1
}

func main() {
    start := time.Now()
    fmt.Println("STARTING UP")
    usrs := []string{"kanywest", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"}

    result := make(chan int)
    go execute(usrs, result)

    for v := range result {
        if v == -1 {
            break
        }
        fmt.Println(v)
    }

    elapsed := time.Since(start)
    fmt.Println("runtime:", elapsed)
}