使用Colly框架我无法登录Evernote帐户

时间:2018-05-29 04:19:38

标签: go web-scraping evernote

我正在使用colly框架来删除网站。我试图登录Evernote帐户以废弃一些东西。但我无法通过它。我使用"用户名"和#34;密码"提供凭证的标题。这是正确的方法吗?。

提前谢谢。

package main

import (
 "log"
 "github.com/gocolly/colly"
)

func main() {
 // create a new collector
 c := colly.NewCollector()

// authenticate
err := c.Post("https://www.evernote.com/Login.action", 
map[string]string{"username": 
  "XXXXXX@XXX.com", "password": "*********"})

if err != nil {
    log.Fatal("Error : ",err)
}

    // attach callbacks after login
   c.OnResponse(func(r *colly.Response) {
        log.Println("response received", r.StatusCode)
   })

    // start scraping
   c.Visit("https://www.evernote.com/")
}

1 个答案:

答案 0 :(得分:3)

你应该尝试模仿浏览器的行为,看看这个实现,我在每一步都添加了评论:

package evernote

import (
    "bytes"
    "errors"
    "fmt"
    "io/ioutil"
    "net/http"
    "net/http/cookiejar"
    "net/url"
    "regexp"
    "strings"
)

const (
    evernoteLoginURL = "https://www.evernote.com/Login.action"
)

var (
    evernoteJSParamsExpr = regexp.MustCompile(`document.getElementById\("(.*)"\).value = "(.*)"`)
    evernoteRedirectExpr = regexp.MustCompile(`Redirecting to <a href="(.*)">`)

    errNoMatches   = errors.New("No matches")
    errRedirectURL = errors.New("Redirect URL not found")
)

// EvernoteClient wraps all methods required to interact with the website.
type EvernoteClient struct {
    Username   string
    Password   string
    httpClient *http.Client

    // These parameters persist during the login process:
    hpts  string
    hptsh string
}

// NewEvernoteClient initializes a new Evernote client.
func NewEvernoteClient(username, password string) *EvernoteClient {
    // Allocate a new cookie jar to mimic the browser behavior:
    cookieJar, _ := cookiejar.New(nil)

    // Fill up basic data:
    c := &EvernoteClient{
        Username: username,
        Password: password,
    }

    // When initializing the http.Client, copy default values from http.DefaultClient
    // Pass a pointer to the cookie jar that was created earlier:
    c.httpClient = &http.Client{
        Transport:     http.DefaultTransport,
        CheckRedirect: http.DefaultClient.CheckRedirect,
        Jar:           cookieJar,
        Timeout:       http.DefaultClient.Timeout,
    }
    return c
}

func (e *EvernoteClient) extractJSParams(body []byte) (err error) {
    matches := evernoteJSParamsExpr.FindAllSubmatch(body, -1)
    if len(matches) == 0 {
        return errNoMatches
    }
    for _, submatches := range matches {
        if len(submatches) < 3 {
            err = errNoMatches
            break
        }
        key := submatches[1]
        val := submatches[2]

        if bytes.Compare(key, hptsKey) == 0 {
            e.hpts = string(val)
        }
        if bytes.Compare(key, hptshKey) == 0 {
            e.hptsh = string(val)
        }
    }
    return nil
}

// Login handles the login action.
func (e *EvernoteClient) Login() error {
    // First step: fetch the login page as a browser visitor would do:
    res, err := e.httpClient.Get(evernoteLoginURL)
    if err != nil {
        return err
    }
    if res.Body == nil {
        return errors.New("No response body")
    }
    body, err := ioutil.ReadAll(res.Body)
    if err != nil {
        return err
    }
    err = e.extractJSParams(body)
    if err != nil {
        return err
    }

    // Second step: we have extracted the "hpts" and "hptsh" parameters
    // We send a request using only the username and setting "evaluateUsername":
    values := &url.Values{}
    values.Set("username", e.Username)
    values.Set("evaluateUsername", "")
    values.Set("analyticsLoginOrigin", "login_action")
    values.Set("clipperFlow", "false")
    values.Set("showSwitchService", "true")
    values.Set("hpts", e.hpts)
    values.Set("hptsh", e.hptsh)

    rawValues := values.Encode()
    req, err := http.NewRequest(http.MethodPost, evernoteLoginURL, bytes.NewBufferString(rawValues))
    if err != nil {
        return err
    }
    req.Header.Set("Accept", "application/json")
    req.Header.Set("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8")
    req.Header.Set("x-requested-with", "XMLHttpRequest")
    req.Header.Set("referer", evernoteLoginURL)
    res, err = e.httpClient.Do(req)
    if err != nil {
        return err
    }
    body, err = ioutil.ReadAll(res.Body)
    if err != nil {
        return err
    }
    bodyStr := string(body)
    if !strings.Contains(bodyStr, `"usePasswordAuth":true`) {
        return errors.New("Password auth not enabled")
    }

    // Third step: do the final request, append password to form data:
    values.Del("evaluateUsername")
    values.Set("password", e.Password)
    values.Set("login", "Sign in")

    rawValues = values.Encode()
    req, err = http.NewRequest(http.MethodPost, evernoteLoginURL, bytes.NewBufferString(rawValues))
    if err != nil {
        return err
    }
    req.Header.Set("Accept", "text/html")
    req.Header.Set("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8")
    req.Header.Set("x-requested-with", "XMLHttpRequest")
    req.Header.Set("referer", evernoteLoginURL)
    res, err = e.httpClient.Do(req)
    if err != nil {
        return err
    }

    // Check the body in order to find the redirect URL:
    body, err = ioutil.ReadAll(res.Body)
    if err != nil {
        return err
    }
    bodyStr = string(body)
    matches := evernoteRedirectExpr.FindAllStringSubmatch(bodyStr, -1)
    if len(matches) == 0 {
        return errRedirectURL
    }
    m := matches[0]
    if len(m) < 2 {
        return errRedirectURL
    }
    redirectURL := m[1]
    fmt.Println("Login is ok, redirect URL:", redirectURL)
    return nil
}

成功获取重定向URL后,只要您继续使用用于登录过程的HTTP客户端,就应该能够发送经过身份验证的请求,cookie jar在此处起着非常重要的作用。

要调用此代码,请使用:

func main() {
    evernoteClient := NewEvernoteClient("user@company", "password")
    err := evernoteClient.Login()
    if err != nil {
        panic(err)
    }
}