我在Haskell制作了这个小程序,从社交媒体网站获取访问令牌。问题在于,每次都没有工作,例如五个中的一个,从来没有从第一次尝试开始。它不是一个访问令牌,而是一个无法解析的第三种形式或进入无限循环。我还有一个可靠的Python程序(基于robobrowser)。我对这种编程很陌生,所以我不知道可能出现什么问题。
import Network.Shpider
import Network.Curl
import Data.Map
import Text.Regex.Posix
import System.Directory
getAccessToken email pass url userAgent = runShpider $ do
let extractForm = head . forms
confirmForm page input =
let theForm = extractForm page
in theForm { inputs = delete input (inputs theForm) }
pattern = "access_token=([a-zA-Z0-9]+)"
token pattern page =
let matrix = (source page) =~ pattern
in matrix!!0!!1
isCookieFile <- liftIO $ doesFileExist "cookies"
when isCookieFile $ liftIO $ removeFile "cookies"
addCurlOpts [CurlUserAgent userAgent, CurlFollowLocation True]
(_, page1) <- download url
(_, page2) <- sendForm $ fillOutForm (extractForm page1) $ pairs $ do
"email" =: email
"pass" =: pass
(_, page3) <- sendForm $ confirmForm page2 "__CANCEL__"
return $ token pattern page3
答案 0 :(得分:0)
我已经设法使用wreq中的Session来解决它
{-# LANGUAGE OverloadedStrings #-}
import Network.Wreq
import Control.Lens hiding (element, elements, children)
import Text.Regex.Posix
import Text.Taggy.Lens
import qualified Data.Text.Encoding as S
import qualified Data.Text.Lazy.Encoding as L
import Data.ByteString.Lazy
import qualified Data.ByteString.Lazy.Char8 as B
import Data.HashMap.Lazy
import Data.Maybe
import qualified Data.Text as T
import qualified Network.Wreq.Session as Session
getAccessToken email pass userAgent url = do
let opts = defaults & header "User-Agent" .~ [userAgent]
sess <- Session.newSession
page1 <- Session.getWith opts sess url
let markup = L.decodeUtf8 $ page1 ^. responseBody
actionUrl = T.unpack $ fromJust $ Prelude.head $ markup ^.. html . allNamed (only "form") . attributed (ix "id" . only "login_form") . attrs . at "action"
extractedForm = markup ^.. html . allNamed (only "form") . attributed (ix "id" . only "login_form") . allNamed (only "input") . attrs
formParams = Prelude.map (\h -> case (h ! "name") of
"email" -> S.encodeUtf8 (h ! "name") := (email :: ByteString)
"pass" -> S.encodeUtf8 (h ! "name") := (pass :: ByteString)
otherwise -> S.encodeUtf8 (h ! "name") := S.encodeUtf8 (h ! "value")) extractedForm
page2 <- Session.postWith opts sess actionUrl formParams
let markup2 = L.decodeUtf8 $ page2 ^. responseBody
actionUrl2 = "https://domain" ++ (T.unpack $ fromJust $ Prelude.head $ markup2 ^.. html . allNamed (only "form") . attrs . at "action")
extractedForm2 = markup2 ^.. html . allNamed (only "form") . allNamed (only "input") . attrs
filterCancel = Prelude.filter (\h -> (h ! "name") /= "__CANCEL__") extractedForm2
confirmFormParams = Prelude.map (\h ->
if (member "value" h) then S.encodeUtf8 (h ! "name") := S.encodeUtf8 (h ! "value")
else S.encodeUtf8 (h ! "name") := ("" :: ByteString)) filterCancel
page3 <- Session.postWith opts sess actionUrl2 confirmFormParams
let pattern = "access_token=([a-zA-Z0-9]+)" :: String
source = B.unpack $ page3 ^. responseBody
target = source =~ pattern :: [[String]]
return $ target!!0!!1