golang http文件太多了

时间:2016-03-02 16:13:51

标签: http go

在我的项目中,我需要发送5000次/秒的http请求。我确实阅读了一些关于这个问题的博客和问题,我尝试了一些事情:

  1. 将流程ulimit更改为10 ^ 5
  2. 关闭content.Body
  3. 使用一个http客户端并重复使用连接
  4. 但他们只是不工作。实际上,我的应用程序可以运行几分钟或几小时,然后在我的设置中达到ulimit值(首先是50000,晚期为100000,使用shell cmd ls -l /proc/PID/fd | wc -l)。 http请求如下:

    package http
    
    import "downloader/request"
    import "downloader/response"
    import "downloader/proxy"
    import "downloader/ssdb"
    import "net/http"
    import "net/url"
    import "time"
    import "fmt"
    import "strings"
    import "io/ioutil"
    import "errors"
    
    func getProxy(ssdbClients []*ssdb.SSDBClient, proxyName string, reqUrl string) (string, error){
        var err error
        p := ""
        url := ""
        if proxyName != "" {
            p, err = proxy.GetOne(ssdbClients,proxyName)
            if err != nil {
                return url, err
            }
            if strings.HasPrefix(reqUrl, "https") {
                url = fmt.Sprintf("https://%s", p)
            } else if strings.HasPrefix(reqUrl, "http") {
                url = fmt.Sprintf("http://%s", p)
            } else {
                return url,errors.New(fmt.Sprintf("reqUrl %s not valid", reqUrl))
            }
            return url, err
        }
        return url, err
    }
    
    func customProxy(req *http.Request) (*url.URL, error) {
        httpProxy := req.Header.Get("HttpProxy")
        if httpProxy == "" {
            return nil, nil
        } else {
            return url.Parse(httpProxy)
        }
    }
    
    func GetClient() *http.Client {
        var client http.Client
        client = http.Client{
            Timeout: 15 * time.Second,
        }
        transport :=  http.Transport{
            Proxy: customProxy,
            DisableCompression: false,
            MaxIdleConnsPerHost: 10000,
        }
        client.Transport = &transport
        return &client
    }
    
    func structResponse(req *request.HttpRequest, content *http.Response, proxy string) (response.HttpResponse, error) {
        var resp response.HttpResponse
        data, err := ioutil.ReadAll(content.Body)
        if err != nil {
            return resp, err
        }
        header := make(map[string]interface{})
        for k, v := range content.Header {
            if len(v) > 0 {
                header[k] = v[0]
            }
        }
        var encoding string
        encoding = content.Header.Get("Content-Encoding")
        resp = response.HttpResponse {
            Request: *req,
            ErrorCode: 0,
            ErrorMsg: "",
            StatusCode: content.StatusCode,
            Reason: content.Status,
            Html: string(data),
            Headers: header,
            Encoding: encoding,
            Url: req.Url,
            CrawlerName: req.CrawlerName,
            ProxyName: req.ProxyName,
            HttpProxy: proxy,
        }
        return resp, err
    }
    
    func Send(ssdbClients []*ssdb.SSDBClient, r *request.HttpRequest, httpClient *http.Client) (response.HttpResponse, error) {
        var resp response.HttpResponse
        var err error
        var httpProxy string
    
        httpProxy, err = getProxy(ssdbClients, r.ProxyName, r.Url)
        if err != nil {
            return resp, err
        }
    
        req, err := http.NewRequest(r.Method, r.Url, strings.NewReader(r.Data))
        if err != nil {
            return resp, err
        }
    
        u, err := url.Parse(r.Url)
        if err != nil {
            return resp, err
        }
    
        defaultHeaders := map[string] string {
            "User-Agent": `Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) 
            AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.109 Safari/537.36`,
            "Upgrade-Insecure-Requests": "1",
            "Connection": "keep-alive",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            "Accept-Language": "zh-CN,zh;q=0.8,en;q=0.6",
            "Cache-Control": "max-age=0",
            "Host": u.Host,
            "HttpProxy": httpProxy,
        }
    
        for k, v := range defaultHeaders {
            req.Header.Set(k, v)
        }
    
        for k, v := range r.Headers {
            req.Header.Set(k, fmt.Sprintf("%v", v))
        }
    
        for k, v := range r.Cookies {
            req.Header.Set("Cookie", fmt.Sprintf("%s=%v", k, v))
        }
    
    
        values := req.URL.Query()
        for k, v := range r.Params {
            values.Add(k, fmt.Sprintf("%v", v))
        }
        req.URL.RawQuery = values.Encode()
    
        content, err := httpClient.Do(req)
        if err != nil {
            return resp, err
        }
        defer content.Body.Close()
        resp, err = structResponse(r, content, httpProxy)
        if err != nil {
            return resp, err
        }
        respCookies := make(map[string] interface{})
        if httpClient.Jar != nil {
            cookies := httpClient.Jar.Cookies(req.URL)
            for _, cookie := range cookies {
                respCookies[cookie.Name] = cookie.Value
            }
            resp.Cookies = respCookies
        }
        return resp, err
    }
    

    我使用我的http库如下:

    func sendRequest(clients []*ssdb.SSDBClient, r *request.HttpRequest, httpClient *http.Client) {
        for {
            resp, err := http.Send(clients, *r, httpClient)
            if err != nil {
                utils.Error.Println("http send fail ", err.Error())
            } else {
                if resp.StatusCode == 200 {
                    utils.Info.Println("status 200, success")
                } else {
                    utils.Warning.Println("status ", resp.StatusCode, resp.Reason)
                }
            }
        }
    }
    func main() {
        httpClient := http.GetClient()
        var wg sync.WaitGroup
        wg.Add(1)
        for i:=0;i<crawlerConsumers;i++ {
            go sendRequest(clients, r, httpClient)
        }
        wg.Wait()
    }
    

    任何人都可以有任何想法或建议吗?

    • 添加代理会对http客户端连接产生影响吗?
    • 我的应用必须达到ulimit值,因为我的应用发送了请求 每秒5000次,一次请求超时15秒?
    • 我是否关闭所有文件句柄?
    • 我忘了处理一些例外吗?

    使用pprof

    添加一些个人资料信息
    /debug/pprof/
    
    profiles:
    0   block
    99490   goroutine(This number is growing constantly, very strange)
    1640    heap
    14  threadcreate
    
    # runtime.MemStats
    # Alloc = 210967776
    # TotalAlloc = 3673382376
    # Sys = 646805032
    # Lookups = 29226
    # Mallocs = 15590422
    # Frees = 14236863
    # HeapAlloc = 210967776
    # HeapSys = 331415552
    # HeapIdle = 72081408
    # HeapInuse = 259334144
    # HeapReleased = 0
    # HeapObjects = 1353559
    # Stack = 283049984 / 283049984
    # MSpan = 4007136 / 4112384
    # MCache = 9664 / 16384
    # BuckHashSys = 1575958
    # NextGC = 306292818
    # PauseNs = [2096153 2554867 2962532 4711468 3373778 3548443 1764893 1961992 2434108 2658626 2627832 2564733 2610212 3278169 2954882 2847604 4443650 3582780 3557718 4201288 4762243 8349689 3812924 5098353 5196422 4494087 5209715 5978150 5060982 4825367 6529020 5738726 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
    # NumGC = 32
    # EnableGC = true
    # DebugGC = false
    
    
    # runtime.MemStats
    # Alloc = 334390488
    # TotalAlloc = 7408124464
    # Sys = 963748912
    # Lookups = 64289
    # Mallocs = 31759565
    # Frees = 29798637
    # HeapAlloc = 334390488
    # HeapSys = 536903680
    # HeapIdle = 171114496
    # HeapInuse = 365789184
    # HeapReleased = 0
    # HeapObjects = 1960928
    # Stack = 381648896 / 381648896
    # MSpan = 4958240 / 5160960
    # MCache = 9664 / 16384
    # BuckHashSys = 1625694
    # NextGC = 337299393
    # PauseNs = [2096153 2554867 2962532 4711468 3373778 3548443 1764893 1961992 2434108 2658626 2627832 2564733 2610212 3278169 2954882 2847604 4443650 3582780 3557718 4201288 4762243 8349689 3812924 5098353 5196422 4494087 5209715 5978150 5060982 4825367 6529020 5738726 5563959 6997601 6906020 6778559 7592484 7193865 7198439 7815078 7217336 6923856 7127406 7491410 7029097 7757883 6861948 7295746 7245947 9037505 7656431 7322897 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
    # NumGC = 52
    # EnableGC = true
    # DebugGC = false
    
    # runtime.MemStats
    # Alloc = 468642296
    # TotalAlloc = 25479911160
    # Sys = 1185490448
    # Lookups = 278984
    # Mallocs = 119997794
    # Frees = 117189156
    # HeapAlloc = 468642296
    # HeapSys = 680919040
    # HeapIdle = 174424064
    # HeapInuse = 506494976
    # HeapReleased = 0
    # HeapObjects = 2808638
    # Stack = 448397312 / 448397312
    # MSpan = 6943104 / 7061504
    # MCache = 9664 / 16384
    # BuckHashSys = 1757422
    # NextGC = 485127068
    # PauseNs = [2096153 2554867 2962532 4711468 3373778 3548443 1764893 1961992 2434108 2658626 2627832 2564733 2610212 3278169 2954882 2847604 4443650 3582780 3557718 4201288 4762243 8349689 3812924 5098353 5196422 4494087 5209715 5978150 5060982 4825367 6529020 5738726 5563959 6997601 6906020 6778559 7592484 7193865 7198439 7815078 7217336 6923856 7127406 7491410 7029097 7757883 6861948 7295746 7245947 9037505 7656431 7322897 7155298 7288230 8027051 7442561 7528532 8418744 7847097 8791562 7242055 8418159 7677703 8662475 8408315 8562446 8110168 7308570 8593523 7820707 8734530 8981041 8721316 8152324 8390552 8603397 9059668 8153933 7986519 7744819 8491656 8562181 7816543 9145512 7902742 7780778 7636659 13573779 12684501 94808535 9443609 8257986 9072718 9634563 9229626 9449536 8644605 8898286 8499036 13101964 8743251 9119720 9267487 8178551 8444107 8362330 8447271 8558115 8788773 9977627 9058283 8743149 8508649 9099904 9382220 9684119 9789404 9730475 8506223 10363233 9979499 9033233 9634088 9739395 9129433 9122154 8615491 9632523 9314836 9845890 9888849 10034358 10207887 10073912 9892683 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
    # NumGC = 135
    # EnableGC = true
    # DebugGC = false
    

    添加cpu个人资料.svg文件attachment

    最后,我弄清楚我应该向http客户端添加超时,如下所示:

    transport :=  http.Transport{
        Proxy: customProxy,
        DisableCompression: false,
        MaxIdleConnsPerHost: 10000,
        Dial: (&net.Dialer{
                    Timeout:   15 * time.Second,
                    KeepAlive: 15 * time.Second,
                }).Dial,
        TLSHandshakeTimeout: 5 * time.Second,
    }
    

1 个答案:

答案 0 :(得分:0)

你看过这个了吗: http://craigwickesser.com/2015/01/golang-http-to-many-open-files/

基本上他建议使用req.Header.Set("Connection", "close")

我想知道你的连接是否一直保持打开状态,直到命中超时/保持活动为止,所以手动将其设置为低于默认设置是否足够快地关闭连接而不会溢出?