不稳定的Haskell Web客户端身份验证代码

时间:2018-01-12 23:14:43

标签: haskell curl

我在Haskell制作了这个小程序,从社交媒体网站获取访问令牌。问题在于,每次都没有工作,例如五个中的一个,从来没有从第一次尝试开始。它不是一个访问令牌,而是一个无法解析的第三种形式或进入无限循环。我还有一个可靠的Python程序(基于robobrowser)。我对这种编程很陌生,所以我不知道可能出现什么问题。

import Network.Shpider
import Network.Curl
import Data.Map
import Text.Regex.Posix
import System.Directory

getAccessToken email pass url userAgent = runShpider $ do
   let extractForm = head . forms 
       confirmForm page input = 
         let theForm = extractForm page
         in  theForm { inputs = delete input (inputs theForm) }
       pattern = "access_token=([a-zA-Z0-9]+)" 
       token pattern page =
         let matrix = (source page) =~ pattern 
         in  matrix!!0!!1
   isCookieFile <- liftIO $ doesFileExist "cookies"
   when isCookieFile $ liftIO $ removeFile "cookies"
   addCurlOpts [CurlUserAgent userAgent, CurlFollowLocation True] 
   (_, page1) <- download url
   (_, page2) <- sendForm $ fillOutForm (extractForm page1) $ pairs $ do
     "email" =: email
     "pass"  =: pass
   (_, page3) <- sendForm $ confirmForm page2 "__CANCEL__"
   return $ token pattern page3

1 个答案:

答案 0 :(得分:0)

我已经设法使用wreq中的Session来解决它

{-# LANGUAGE OverloadedStrings #-}

import Network.Wreq
import Control.Lens hiding (element, elements, children)
import Text.Regex.Posix
import Text.Taggy.Lens
import qualified Data.Text.Encoding as S
import qualified Data.Text.Lazy.Encoding as L
import Data.ByteString.Lazy 
import qualified Data.ByteString.Lazy.Char8 as B
import Data.HashMap.Lazy
import Data.Maybe 
import qualified Data.Text as T
import qualified Network.Wreq.Session as Session

getAccessToken email pass userAgent url = do 
     let opts = defaults & header "User-Agent" .~ [userAgent] 
     sess <- Session.newSession     
     page1 <- Session.getWith opts sess url
     let markup = L.decodeUtf8 $ page1 ^. responseBody
         actionUrl = T.unpack $ fromJust $ Prelude.head $ markup ^.. html . allNamed (only "form") . attributed (ix "id" . only "login_form") . attrs . at "action"
         extractedForm = markup ^.. html . allNamed (only "form") . attributed (ix "id" . only "login_form") . allNamed (only "input") . attrs 
         formParams = Prelude.map (\h -> case (h ! "name") of 
            "email"   -> S.encodeUtf8 (h ! "name") := (email :: ByteString)
            "pass"    -> S.encodeUtf8 (h ! "name") := (pass  :: ByteString)
            otherwise -> S.encodeUtf8 (h ! "name") := S.encodeUtf8 (h ! "value")) extractedForm 
     page2 <- Session.postWith opts sess actionUrl formParams
     let markup2 = L.decodeUtf8 $ page2 ^. responseBody
         actionUrl2 = "https://domain" ++ (T.unpack $ fromJust $ Prelude.head $ markup2 ^.. html . allNamed (only "form") . attrs . at "action")
         extractedForm2 = markup2 ^.. html . allNamed (only "form") . allNamed (only "input") . attrs  
         filterCancel = Prelude.filter (\h -> (h ! "name") /= "__CANCEL__") extractedForm2         
         confirmFormParams = Prelude.map (\h -> 
           if (member "value" h) then S.encodeUtf8 (h ! "name") := S.encodeUtf8 (h ! "value")
           else S.encodeUtf8 (h ! "name") := ("" :: ByteString)) filterCancel
     page3 <- Session.postWith opts sess actionUrl2 confirmFormParams 
     let pattern = "access_token=([a-zA-Z0-9]+)" :: String
         source =  B.unpack $ page3 ^. responseBody
         target = source =~ pattern :: [[String]]
     return $ target!!0!!1