目前我的抓取工具是通过邮政编码完成他的工作,我如何将其更改为按国家/地区抓取?
这是代码:https://pastebin.com/4Q6m8RPR
我需要在cookie中保存“国家”,目前我只保存“邮政编码”,它有两个功能,访问网站然后“捕获”cookie。
func ChangeZipCode(purl string, proxies []dal.Proxy, zipCode string) {
cookieContainer = make(map[string]string)
u, err := url.Parse(purl)
if err != nil {
logger.Println(err)
} else {
logger.Println(u)
}
formData := "locationType=LOCATION_INPUT&zipCode=" + zipCode + "&storeContext=generic&deviceType=web&pageType=Gateway&actionSource=glow&almBrandId=undefined"
domainFix := "com"
searchTxt := "\"Tu dirección de envío actual es:\""
if strings.Contains(purl, "www.amazon.es") {
domainFix = "es"
searchTxt = "\"Tu dirección de envío actual es:\""
} else if strings.Contains(purl, "www.amazon.de") {
domainFix = "de"
searchTxt = "\"Sie kaufen gerade ein für:\""
} else if strings.Contains(purl, "www.amazon.fr") {
domainFix = "fr"
searchTxt = "\"Votre lieu de livraison est désormais:\""
} else if strings.Contains(purl, "www.amazon.co.uk") {
domainFix = "co.uk"
searchTxt = "\"You're now shopping for delivery to:\""
} else if strings.Contains(purl, "www.amazon.it") {
domainFix = "it"
searchTxt = "\"L'indirizzo di consegna selezionato è:\""
} else {
domainFix = "com"
searchTxt = "\"You're now shopping for delivery to:\""
}
homePage := ""
homePage, err = getRequest("https://www.amazon."+domainFix, proxies)
//uidCookie := strings.TrimLeft(strings.TrimRight(homePage, "\" })</script>"), "/ah/ajax/counter?ctr=desktop_ajax_atf")
uidCookie := GetStringInBetweenTwoString(homePage, "/ah/ajax/counter?ctr=desktop_ajax_atf", "\" })</script>")
uidCookieUrl := "https://www.amazon." + domainFix + "/ah/ajax/counter?ctr=desktop_ajax_atf" + uidCookie
postRequest(uidCookieUrl, proxies, "")
tokenPage := ""
tokenPage, err = getRequest("https://www.amazon."+domainFix+"/gp/glow/get-address-selections.html?deviceType=desktop&pageType=Gateway&storeContext=NoStoreName", proxies)
//crosToken := strings.TrimLeft(strings.TrimRight(tokenPage, "\", IDs:{\"ADDRESS_LIST\":\"GLUXAddressList\""), "\"You're now shopping for delivery to:\", CSRF_TOKEN : \"")
crosToken := GetStringInBetweenTwoString(tokenPage, searchTxt+", CSRF_TOKEN : \"", "\", IDs:{\"ADDRESS_LIST\":\"GLUXAddressList\"")
changeZipCodePostRequest("https://www.amazon."+domainFix+"/gp/delivery/ajax/address-change.html", proxies, formData, crosToken)
postRequest("https://www.amazon."+domainFix+"/gp/glow/get-location-label.html", proxies, "storeContext=hpc&pageType=Landing")
}
func changeZipCodePostRequest(surl string, proxies []dal.Proxy, formData string, token string) (string, error, bool) {
logger.Println("processing", surl)
var client fasthttp.Client
if len(proxies) > 0 {
px := getRandomProxy(proxies)
client = fasthttp.Client{
Dial: proxy.FastHTTPProxyDialer(px),
}
logger.Println("with proxy", px)
}
defer client.CloseIdleConnections()
req := fasthttp.AcquireRequest()
resp := fasthttp.AcquireResponse()
defer fasthttp.ReleaseRequest(req)
defer fasthttp.ReleaseResponse(resp)
// Acquire cookie jar
u, errUrl := url.Parse(surl)
if errUrl == nil {
cj = cookiejar.AcquireCookieJar()
for key, value := range cookieContainer {
if strings.Contains(key, u.Host) {
key = strings.Replace(key, u.Host, "", -1)
valueArry := strings.Split(value, "=")
value = strings.Split(valueArry[1], ";")[0]
cj.Set(key, value)
}
}
}
cj.FillRequest(req)
req.SetRequestURI(surl)
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
req.Header.Set("User-Agent", getRandomUserAgent())
req.Header.Set("Accept-Encoding", "gzip")
req.Header.Set("Upgrade-Insecure-Requests", "1")
req.Header.Set("anti-csrftoken-a2z", token)
req.Header.Set("Connection", "keep-alive")
req.Header.SetMethodBytes(strPost)
req.SetBodyString(formData)
err := client.DoTimeout(req, resp, 30*time.Second)
if err != nil {
return "", err, true
}
resp.Header.VisitAllCookie(func(key, value []byte) {
c := fasthttp.AcquireCookie()
defer fasthttp.ReleaseCookie(c)
c.ParseBytes(value)
var emptyContent = string(key) + "=-;"
if !strings.Contains(string(value), emptyContent) {
var middle = strings.Replace(string(value), "Domain=.amazon", "domain=.www.amazon", -1)
middle = strings.Replace(middle, "domain=.amazon", "domain=.www.amazon", -1)
cookieContainer[string(key)+u.Host] = middle
}
})
contentEncoding := resp.Header.Peek("Content-Encoding")
var body []byte
if bytes.EqualFold(contentEncoding, []byte("gzip")) {
fmt.Println("Unzipping...")
body, _ = resp.BodyGunzip()
} else {
body = resp.Body()
}
content := string(body)
return content, nil, false
}