我正在使用colly框架来删除网站。我试图登录Evernote帐户以废弃一些东西。但我无法通过它。我使用"用户名"和#34;密码"提供凭证的标题。这是正确的方法吗?。
提前谢谢。
package main
import (
"log"
"github.com/gocolly/colly"
)
func main() {
// create a new collector
c := colly.NewCollector()
// authenticate
err := c.Post("https://www.evernote.com/Login.action",
map[string]string{"username":
"XXXXXX@XXX.com", "password": "*********"})
if err != nil {
log.Fatal("Error : ",err)
}
// attach callbacks after login
c.OnResponse(func(r *colly.Response) {
log.Println("response received", r.StatusCode)
})
// start scraping
c.Visit("https://www.evernote.com/")
}
答案 0 :(得分:3)
你应该尝试模仿浏览器的行为,看看这个实现,我在每一步都添加了评论:
package evernote
import (
"bytes"
"errors"
"fmt"
"io/ioutil"
"net/http"
"net/http/cookiejar"
"net/url"
"regexp"
"strings"
)
const (
evernoteLoginURL = "https://www.evernote.com/Login.action"
)
var (
evernoteJSParamsExpr = regexp.MustCompile(`document.getElementById\("(.*)"\).value = "(.*)"`)
evernoteRedirectExpr = regexp.MustCompile(`Redirecting to <a href="(.*)">`)
errNoMatches = errors.New("No matches")
errRedirectURL = errors.New("Redirect URL not found")
)
// EvernoteClient wraps all methods required to interact with the website.
type EvernoteClient struct {
Username string
Password string
httpClient *http.Client
// These parameters persist during the login process:
hpts string
hptsh string
}
// NewEvernoteClient initializes a new Evernote client.
func NewEvernoteClient(username, password string) *EvernoteClient {
// Allocate a new cookie jar to mimic the browser behavior:
cookieJar, _ := cookiejar.New(nil)
// Fill up basic data:
c := &EvernoteClient{
Username: username,
Password: password,
}
// When initializing the http.Client, copy default values from http.DefaultClient
// Pass a pointer to the cookie jar that was created earlier:
c.httpClient = &http.Client{
Transport: http.DefaultTransport,
CheckRedirect: http.DefaultClient.CheckRedirect,
Jar: cookieJar,
Timeout: http.DefaultClient.Timeout,
}
return c
}
func (e *EvernoteClient) extractJSParams(body []byte) (err error) {
matches := evernoteJSParamsExpr.FindAllSubmatch(body, -1)
if len(matches) == 0 {
return errNoMatches
}
for _, submatches := range matches {
if len(submatches) < 3 {
err = errNoMatches
break
}
key := submatches[1]
val := submatches[2]
if bytes.Compare(key, hptsKey) == 0 {
e.hpts = string(val)
}
if bytes.Compare(key, hptshKey) == 0 {
e.hptsh = string(val)
}
}
return nil
}
// Login handles the login action.
func (e *EvernoteClient) Login() error {
// First step: fetch the login page as a browser visitor would do:
res, err := e.httpClient.Get(evernoteLoginURL)
if err != nil {
return err
}
if res.Body == nil {
return errors.New("No response body")
}
body, err := ioutil.ReadAll(res.Body)
if err != nil {
return err
}
err = e.extractJSParams(body)
if err != nil {
return err
}
// Second step: we have extracted the "hpts" and "hptsh" parameters
// We send a request using only the username and setting "evaluateUsername":
values := &url.Values{}
values.Set("username", e.Username)
values.Set("evaluateUsername", "")
values.Set("analyticsLoginOrigin", "login_action")
values.Set("clipperFlow", "false")
values.Set("showSwitchService", "true")
values.Set("hpts", e.hpts)
values.Set("hptsh", e.hptsh)
rawValues := values.Encode()
req, err := http.NewRequest(http.MethodPost, evernoteLoginURL, bytes.NewBufferString(rawValues))
if err != nil {
return err
}
req.Header.Set("Accept", "application/json")
req.Header.Set("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8")
req.Header.Set("x-requested-with", "XMLHttpRequest")
req.Header.Set("referer", evernoteLoginURL)
res, err = e.httpClient.Do(req)
if err != nil {
return err
}
body, err = ioutil.ReadAll(res.Body)
if err != nil {
return err
}
bodyStr := string(body)
if !strings.Contains(bodyStr, `"usePasswordAuth":true`) {
return errors.New("Password auth not enabled")
}
// Third step: do the final request, append password to form data:
values.Del("evaluateUsername")
values.Set("password", e.Password)
values.Set("login", "Sign in")
rawValues = values.Encode()
req, err = http.NewRequest(http.MethodPost, evernoteLoginURL, bytes.NewBufferString(rawValues))
if err != nil {
return err
}
req.Header.Set("Accept", "text/html")
req.Header.Set("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8")
req.Header.Set("x-requested-with", "XMLHttpRequest")
req.Header.Set("referer", evernoteLoginURL)
res, err = e.httpClient.Do(req)
if err != nil {
return err
}
// Check the body in order to find the redirect URL:
body, err = ioutil.ReadAll(res.Body)
if err != nil {
return err
}
bodyStr = string(body)
matches := evernoteRedirectExpr.FindAllStringSubmatch(bodyStr, -1)
if len(matches) == 0 {
return errRedirectURL
}
m := matches[0]
if len(m) < 2 {
return errRedirectURL
}
redirectURL := m[1]
fmt.Println("Login is ok, redirect URL:", redirectURL)
return nil
}
成功获取重定向URL后,只要您继续使用用于登录过程的HTTP客户端,就应该能够发送经过身份验证的请求,cookie jar在此处起着非常重要的作用。
要调用此代码,请使用:
func main() {
evernoteClient := NewEvernoteClient("user@company", "password")
err := evernoteClient.Login()
if err != nil {
panic(err)
}
}