首先 - 道歉。这是我编写的第一个Haskell代码。我正在编译一些直接来自Real World Haskell第24章的代码。代码使用在另一个源文件中实现的MapReduce引擎来计算一行中的单词数。这是代码:
module Main where
import Control.Monad (forM_)
import Data.Int (Int64)
import qualified Data.ByteString.Lazy.Char8 as LB
import System.Environment (getArgs)
import LineChunks (chunkedReadWith)
import MapReduce (mapReduce, rnf)
lineCount :: [LB.ByteString] -> Int64
lineCount = mapReduce rdeepseq (LB.count '\n')
rdeepseq sum
main :: IO ()
main = do
args <- getArgs
forM_ args $ \path -> do
numLines <- chunkedReadWith lineCount path
putStrLn $ path ++ ": " ++ show numLines
这段代码编译得很好,我得到一个LineCount.exe。
现在,我应该如何实际使用它来计算文件中的行?我有一个文件'test',其中包含一些测试文本。但是当我这样做时:
LineCount test
在命令行上,我得到:
Exception: test: hGetBufSome: illegal operation (handle is closed)
可能出现什么问题?
以下是另一个文件中的更多代码:
module LineChunks
(
chunkedReadWith
) where
import Control.Exception (bracket, finally)
import Control.Monad (forM, liftM)
import Control.Parallel.Strategies (NFData, rdeepseq)
import Data.Int (Int64)
import qualified Data.ByteString.Lazy.Char8 as LB
import GHC.Conc (numCapabilities)
import System.IO
data ChunkSpec = CS {
chunkOffset :: !Int64
, chunkLength :: !Int64
} deriving (Eq, Show)
withChunks :: (NFData a) =>
(FilePath -> IO [ChunkSpec])
-> ([LB.ByteString] -> a)
-> FilePath
-> IO a
withChunks chunkFunc process path = do
(chunks, handles) <- chunkedRead chunkFunc path
let r = process chunks
(rdeepseq r `seq` return r) `finally` mapM_ hClose handles
chunkedReadWith :: (NFData a) =>
([LB.ByteString] -> a) -> FilePath -> IO a
chunkedReadWith func path =
withChunks (lineChunks (numCapabilities * 4)) func path
{-- /snippet withChunks --}
{-- snippet chunkedRead --}
chunkedRead :: (FilePath -> IO [ChunkSpec])
-> FilePath
-> IO ([LB.ByteString], [Handle])
chunkedRead chunkFunc path = do
chunks <- chunkFunc path
liftM unzip . forM chunks $ \spec -> do
h <- openFile path ReadMode
hSeek h AbsoluteSeek (fromIntegral (chunkOffset spec))
chunk <- LB.take (chunkLength spec) `liftM` LB.hGetContents h
return (chunk, h)
{-- /snippet chunkedRead --}
{-- snippet lineChunks --}
lineChunks :: Int -> FilePath -> IO [ChunkSpec]
lineChunks numChunks path = do
bracket (openFile path ReadMode) hClose $ \h -> do
totalSize <- fromIntegral `liftM` hFileSize h
let chunkSize = totalSize `div` fromIntegral numChunks
findChunks offset = do
let newOffset = offset + chunkSize
hSeek h AbsoluteSeek (fromIntegral newOffset)
let findNewline off = do
eof <- hIsEOF h
if eof
then return [CS offset (totalSize - offset)]
else do
bytes <- LB.hGet h 4096
case LB.elemIndex '\n' bytes of
Just n -> do
chunks@(c:_) <- findChunks (off + n + 1)
let coff = chunkOffset c
return (CS offset (coff - offset):chunks)
Nothing -> findNewline (off + LB.length bytes)
findNewline newOffset
findChunks 0
{-- /snippet lineChunks --}
-- Ensure that a series of ChunkSpecs is contiguous and
-- non-overlapping.
prop_contig (CS o l:cs@(CS o' _:_)) | o + l == o' = prop_contig cs
| otherwise = False
prop_contig _ = True
答案 0 :(得分:3)
而不是
LineCount < test
使用
LineCount test
说明:对main中getArgs的调用从命令行获取args。使用“&lt;”意味着从stdin读取。
答案 1 :(得分:2)
转到Real World Haskell随附代码中的“ch24”目录,进行以下更改并运行
ghc -O2 - make -threaded LineCount&amp;&amp; ./LineCount LineCount.hs
然后它应该给出输出
LineCount.hs: 22
以下是必要的更改:
diff --git a/ch24/LineChunks.hs b/ch24/LineChunks.hs
index 0e82805..bda104d 100644
--- a/ch24/LineChunks.hs
+++ b/ch24/LineChunks.hs
@@ -6,7 +6,7 @@ module LineChunks
import Control.Exception (bracket, finally)
import Control.Monad (forM, liftM)
-import Control.Parallel.Strategies (NFData, rnf)
+import Control.DeepSeq(NFData,rnf)
import Data.Int (Int64)
import qualified Data.ByteString.Lazy.Char8 as LB
import GHC.Conc (numCapabilities)
diff --git a/ch24/LineCount.hs b/ch24/LineCount.hs
index c6dd40b..46218e3 100644
--- a/ch24/LineCount.hs
+++ b/ch24/LineCount.hs
@@ -7,11 +7,11 @@ import qualified Data.ByteString.Lazy.Char8 as LB
import System.Environment (getArgs)
import LineChunks (chunkedReadWith)
-import MapReduce (mapReduce, rnf)
+import MapReduce (mapReduce, rdeepseq)
lineCount :: [LB.ByteString] -> Int64
-lineCount = mapReduce rnf (LB.count '\n')
- rnf sum
+lineCount = mapReduce rdeepseq (LB.count '\n')
+ rdeepseq sum
main :: IO ()
main = do
diff --git a/ch24/MapReduce.hs b/ch24/MapReduce.hs
index d0ff90b..87c79aa 100644
--- a/ch24/MapReduce.hs
+++ b/ch24/MapReduce.hs
@@ -3,7 +3,7 @@ module MapReduce
mapReduce
, simpleMapReduce
-- exported for convenience
- , rnf
+ , rdeepseq
, rwhnf
) where
请参阅此答案的上一版本,原因是您遇到错误。
答案 2 :(得分:1)
这对我有用:
module Main where
import Control.Monad (forM_)
import Data.Int (Int64)
import qualified Data.ByteString.Lazy.Char8 as LB
import System.Environment (getArgs)
import LineChunks (chunkedReadWith)
import Control.Parallel.Strategies(rdeepseq)
import MapReduce (mapReduce)
lineCount :: [LB.ByteString] -> Int64
lineCount = mapReduce rdeepseq (LB.count '\n')
rdeepseq sum
lineCountFile :: FilePath -> IO Int64
lineCountFile path = chunkedReadWith lineCount path
我将rnf
更改为rdeepseq
因为rnf
似乎不再出现在“并行包中”。
这是本书的配套代码: http://examples.oreilly.com/9780596514983/rwh-examples2.zip