Haskell的Network.Browser模块是像Perl的LWP还是Python的机械化?

时间:2010-09-14 07:25:33

标签: haskell networking

Network.Browser文档说该模块支持以下内容:

  • HTTP身份验证处理
  • 透明处理重定向
  • Cookie存储+传输。
  • 事务记录代理中介连接。

对我而言,这听起来像浏览器的开头,它让我抓取网页,处理身份验证到网站,cookie等。

但是,该模块附带零示例代码,说明或教程。我无法弄清楚如何使用它。

有人可以举例说明如何使用它1)访问网站,2)登录,3)下载需要您登录的文件?

1 个答案:

答案 0 :(得分:3)

我建议你看看Network.Curl。

要回答您的问题,以下是一个示例(取自http://haskell.pastebin.com/9kPiGxiH):

import Data.IORef
import Network.HTTP
import Network.Browser
import Network.URI
import Data.Maybe
import Control.Monad
import Data.List
import Text.Regex.TDFA
import Control.Concurrent

pageUrl off = URI "http:" (Just $ URIAuth "" "www.interpals.net" "") "/dosearch.php" ("?todo=search&sec=adv&age1=15&age2=18&sex[]=FEMALE&lfor[]=lfor_email&lfor[]=lfor_snail&lfor[]=lfor_langex&lfor[]=lfor_friend&lfor[]=lfor_flirt&lfor[]=lfor_relation&countries[]=AT&countries[]=DE&countries[]=CH&state=&languages[]=any&keywords=&sort=p.last_login+DESC&offset="++(show off)) ""

getPage     :: URI -> BrowserAction (HandleStream [Char]) String
getPage uri = do
    setErrHandler $ const $ return ()
    setOutHandler $ const $ return ()
    (_,s) <- request $ Request (uri) GET
        [Header HdrCookie "__ubic1=MTE3ODM0NDM0MTRjN2RkYTA1OTAzMmU4LjkxODE1Njk2; __utma=46363135.421215970.1283316265.1283538085.1283541700.10; __utmz=46363135.1283316265.1.1.utmccn=(direct)|utmcsr=(direct)|utmcmd=(none); __utmc=46363135; PHPSESSID=59a130c66d4853f85289852f15cefa3a; resolution=1920x1080; ip_auto_login[login]=cap11235; ip_auto_login[password_md5]=NDM0NWM0NDlkZTg4MjRkMWVhZmJmZWNiZTQwOWQ4YTE%3D; __utmb=46363135"] ""
    return $ rspBody s

getPeople :: Int -> BrowserAction (HandleStream [Char]) ([String], Int)
getPeople off = do
    s <- getPage (pageUrl off)
    let t = (s=~"<a href='/([^?.]+)\\?")::[[String]]
    let next = if length t > 0 then off+10 else 0
    return (nub $ map (!!1) t, next)

personUrl :: String -> URI
personUrl name = fromJust $ parseURI ("http://www.interpals.net/"++name)

viewPerson :: String -> BrowserAction (HandleStream [Char]) ()
viewPerson name = do
    _ <- getPage $ personUrl name
    return ()

doCycle :: IORef (Int, Int) -> IO ()
doCycle r = do
    (count, off) <- readIORef r
    (people, newOff) <- browse $  getPeople off
    mapM_ (forkIO . browse . viewPerson) people
    let newCount = count + (length people)
    writeIORef r (newCount, if newOff<2000 then newOff else 0)
    print newCount
    doCycle r

main = do
    t <- newIORef (0,0)
    doCycle t