按国家/地区抓取亚马逊价格

时间:2021-02-01 00:42:36

标签: go web-scraping amazon

目前我的抓取工具是通过邮政编码完成他的工作,我如何将其更改为按国家/地区抓取?

这是代码:https://pastebin.com/4Q6m8RPR

我需要在cookie中保存“国家”,目前我只保存“邮政编码”,它有两个功能,访问网站然后“捕获”cookie。

func ChangeZipCode(purl string, proxies []dal.Proxy, zipCode string) {
    cookieContainer = make(map[string]string)
    u, err := url.Parse(purl)
    if err != nil {
        logger.Println(err)
    } else {
        logger.Println(u)
    }
    formData := "locationType=LOCATION_INPUT&zipCode=" + zipCode + "&storeContext=generic&deviceType=web&pageType=Gateway&actionSource=glow&almBrandId=undefined"
    domainFix := "com"
    searchTxt := "\"Tu dirección de envío actual es:\""
    if strings.Contains(purl, "www.amazon.es") {
        domainFix = "es"
        searchTxt = "\"Tu dirección de envío actual es:\""
    } else if strings.Contains(purl, "www.amazon.de") {
        domainFix = "de"
        searchTxt = "\"Sie kaufen gerade ein für:\""
    } else if strings.Contains(purl, "www.amazon.fr") {
        domainFix = "fr"
        searchTxt = "\"Votre lieu de livraison est désormais:\""
    } else if strings.Contains(purl, "www.amazon.co.uk") {
        domainFix = "co.uk"
        searchTxt = "\"You're now shopping for delivery to:\""
    } else if strings.Contains(purl, "www.amazon.it") {
        domainFix = "it"
        searchTxt = "\"L'indirizzo di consegna selezionato è:\""
    } else {
        domainFix = "com"
        searchTxt = "\"You're now shopping for delivery to:\""
    }
 
    homePage := ""
    homePage, err = getRequest("https://www.amazon."+domainFix, proxies)
 
    //uidCookie := strings.TrimLeft(strings.TrimRight(homePage, "\" })</script>"), "/ah/ajax/counter?ctr=desktop_ajax_atf")
    uidCookie := GetStringInBetweenTwoString(homePage, "/ah/ajax/counter?ctr=desktop_ajax_atf", "\" })</script>")
 
    uidCookieUrl := "https://www.amazon." + domainFix + "/ah/ajax/counter?ctr=desktop_ajax_atf" + uidCookie
    postRequest(uidCookieUrl, proxies, "")
    tokenPage := ""
    tokenPage, err = getRequest("https://www.amazon."+domainFix+"/gp/glow/get-address-selections.html?deviceType=desktop&pageType=Gateway&storeContext=NoStoreName", proxies)
    //crosToken := strings.TrimLeft(strings.TrimRight(tokenPage, "\", IDs:{\"ADDRESS_LIST\":\"GLUXAddressList\""), "\"You're now shopping for delivery to:\", CSRF_TOKEN : \"")
    crosToken := GetStringInBetweenTwoString(tokenPage, searchTxt+", CSRF_TOKEN : \"", "\", IDs:{\"ADDRESS_LIST\":\"GLUXAddressList\"")
    changeZipCodePostRequest("https://www.amazon."+domainFix+"/gp/delivery/ajax/address-change.html", proxies, formData, crosToken)
    postRequest("https://www.amazon."+domainFix+"/gp/glow/get-location-label.html", proxies, "storeContext=hpc&pageType=Landing")
 
}
func changeZipCodePostRequest(surl string, proxies []dal.Proxy, formData string, token string) (string, error, bool) {
    logger.Println("processing", surl)
    var client fasthttp.Client
    if len(proxies) > 0 {
        px := getRandomProxy(proxies)
        client = fasthttp.Client{
            Dial: proxy.FastHTTPProxyDialer(px),
        }

        logger.Println("with proxy", px)
    }

    defer client.CloseIdleConnections()

    req := fasthttp.AcquireRequest()
    resp := fasthttp.AcquireResponse()
    defer fasthttp.ReleaseRequest(req)
    defer fasthttp.ReleaseResponse(resp)
    // Acquire cookie jar
    u, errUrl := url.Parse(surl)
    if errUrl == nil {
        cj = cookiejar.AcquireCookieJar()
        for key, value := range cookieContainer {
            if strings.Contains(key, u.Host) {
                key = strings.Replace(key, u.Host, "", -1)
                valueArry := strings.Split(value, "=")
                value = strings.Split(valueArry[1], ";")[0]
                cj.Set(key, value)
            }
        }
    }
    cj.FillRequest(req)

    req.SetRequestURI(surl)
    req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
    req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
    req.Header.Set("User-Agent", getRandomUserAgent())
    req.Header.Set("Accept-Encoding", "gzip")
    req.Header.Set("Upgrade-Insecure-Requests", "1")
    req.Header.Set("anti-csrftoken-a2z", token)
    req.Header.Set("Connection", "keep-alive")
    req.Header.SetMethodBytes(strPost)
    req.SetBodyString(formData)
    err := client.DoTimeout(req, resp, 30*time.Second)
    if err != nil {
        return "", err, true
    }
    resp.Header.VisitAllCookie(func(key, value []byte) {
        c := fasthttp.AcquireCookie()
        defer fasthttp.ReleaseCookie(c)

        c.ParseBytes(value)
        var emptyContent = string(key) + "=-;"
        if !strings.Contains(string(value), emptyContent) {
            var middle = strings.Replace(string(value), "Domain=.amazon", "domain=.www.amazon", -1)
            middle = strings.Replace(middle, "domain=.amazon", "domain=.www.amazon", -1)
            cookieContainer[string(key)+u.Host] = middle
        }
    })
    contentEncoding := resp.Header.Peek("Content-Encoding")
    var body []byte
    if bytes.EqualFold(contentEncoding, []byte("gzip")) {
        fmt.Println("Unzipping...")
        body, _ = resp.BodyGunzip()
    } else {
        body = resp.Body()
    }
    content := string(body)
    return content, nil, false
}

0 个答案:

没有答案