我在Haskell玩Channels。我希望并行执行一些IO
操作,直到其中一个失败,然后将所有结果收集到列表中。
此代码错误Exception <<loop>>
。如何让它与getChanContents
一起使用?我见过的所有例子都假设他们知道频道上有多少消息。
是否有更简洁的方法从工人那里收集大量结果?
module UrlPatterns where
import Control.Concurrent
import Types
import Text.HTML.Scalpel
import Data.Monoid ((<>))
import Control.Concurrent.Chan
import Control.Applicative
import Data.Maybe (isJust, catMaybes)
import Data.List (takeWhile)
-- find all valid links under a domain that follow the pattern:
-- http://example.com/pages/(1..N)
-- as soon as one is missing, return a list of all the ones you found
findIncrementing :: URL -> IO [Link]
findIncrementing base = do
let num = 1
-- find channel
cfind <- newChan
writeChan cfind (base, num)
-- results channel
cdone <- newChan
forkIO $ worker cfind cdone
-- collect the results
results <- getChanContents cdone
let results = takeWhile isJust results :: [Maybe Link]
print results
return []
worker :: Chan (URL, Int) -> Chan (Maybe Link) -> IO ()
worker next done = loop
where
loop = do
(base, num) <- readChan next
let url = pageUrl base num
putStrLn $ "FETCHING: " <> url
mt <- findPageTitle url
case mt of
Nothing -> do
writeChan done Nothing
putStrLn ("Missed " <> show num)
Just t -> do
writeChan done $ Just $ Link url t
writeChan next (base, num+1)
loop
scrapeTitle :: Scraper String String
scrapeTitle = text "title"
findPageTitle :: URL -> IO (Maybe String)
findPageTitle url = scrapeURL url scrapeTitle
pageUrl :: URL -> Int -> URL
pageUrl base num = base <> show num
答案 0 :(得分:2)
感谢@bartavelle。我有一个与频道代码无关的错误。这是相关的修复:
-- collect the results
results <- getChanContents cdone
let links = catMaybes $ takeWhile isJust results
return links