我的Haskell程序在尝试解析115MB JSON文件时内存不足。我怀疑我做了一些你不应该在Haskell做的事情 - 在程序的早期阶段,由于我在String
而不是{{1}进行操作,因此内存不足} s - 但我无法弄清楚是什么。
我已将我的程序浓缩为以下MWE:
ByteString
输入格式如下:
{-# LANGUAGE GeneralizedNewtypeDeriving, OverloadedStrings, FlexibleInstances #-}
----------------------------------------
-- Imports
----------------------------------------
import System.Environment
( getArgs )
import Control.Monad
( mzero
, when
)
import qualified Data.ByteString.Lazy.Char8 as BS
import Data.Aeson
import Data.Maybe
import Data.Scientific
( Scientific )
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Vector as V
----------------------------------------
-- Data types
----------------------------------------
newtype Natural
= Natural Integer
deriving (Show, Eq, Ord)
instance Num Natural where
fromInteger = toNatural
x + y = toNatural (fromNatural x + fromNatural y)
x - y = let r = fromNatural x - fromNatural y
in if r < 0
then error "Subtraction yielded a negative value"
else toNatural r
x * y = toNatural (fromNatural x * fromNatural y)
abs x = x
signum x = toNatural $ signum $ fromNatural x
instance Enum Natural where
toEnum = toNatural . toInteger
fromEnum = fromInteger . fromNatural
instance Real Natural where
toRational (Natural i) = toRational i
instance Integral Natural where
quotRem (Natural x) (Natural y) =
( toNatural $ quot x y
, toNatural $ rem x y
)
toInteger (Natural i) = i
instance FromJSON Natural where
parseJSON (Number sn) = return $ sn2nat sn
parseJSON _ = mzero
instance ToJSON Natural where
toJSON i = toJSON (fromNatural i)
----------------------------------------
data PatternMatchset
= PatternMatchset
{ pmTarget :: TargetMachineID
, pmMatches :: [PatternMatch]
, pmTime :: Maybe Double
}
deriving (Show)
instance FromJSON PatternMatchset where
parseJSON (Object v) =
PatternMatchset
<$> v .: "target-machine-id"
<*> v .: "match-data"
<*> v .: "time"
parseJSON _ = mzero
instance ToJSON PatternMatchset where
toJSON m =
object [ "target-machine-id" .= (pmTarget m)
, "match-data" .= (pmMatches m)
, "time" .= (pmTime m)
]
----------------------------------------
data PatternMatch
= PatternMatch
{ pmInstrID :: InstructionID
, pmMatchID :: MatchID
, pmMatch :: Match NodeID
}
deriving (Show)
instance FromJSON PatternMatch where
parseJSON (Object v) =
PatternMatch
<$> v .: "instr-id"
<*> v .: "match-id"
<*> v .: "match"
parseJSON _ = mzero
instance ToJSON PatternMatch where
toJSON m =
object [ "instr-id" .= (pmInstrID m)
, "match-id" .= (pmMatchID m)
, "match" .= (pmMatch m)
]
----------------------------------------
data Match n
= Match { f2pMaps :: M.Map n [n]
, p2fMaps :: M.Map n [n]
}
deriving (Show, Eq, Ord)
instance FromJSON (Match NodeID) where
parseJSON v@(Array _) =
do list <- parseJSON v
return $ toMatch list
parseJSON _ = mzero
instance ToJSON (Match NodeID) where
toJSON m = toJSON $ fromMatch m
----------------------------------------
data Mapping n
= Mapping
{ fNode :: n
, pNode :: n
}
deriving (Show, Eq, Ord)
instance FromJSON (Mapping NodeID) where
parseJSON v@(Array _) =
do list <- parseJSON v
when (length list /= 2) mzero
return Mapping { fNode = head list
, pNode = last list
}
parseJSON _ = mzero
instance ToJSON (Mapping NodeID) where
toJSON m = Array (V.fromList [toJSON $ fNode m, toJSON $ pNode m])
----------------------------------------
newtype MatchID
= MatchID Natural
deriving (Show, Eq, Ord, Num, Enum, Real, Integral)
instance FromJSON MatchID where
parseJSON (Number sn) = return $ toMatchID $ sn2nat sn
parseJSON _ = mzero
instance ToJSON MatchID where
toJSON mid = toJSON (fromMatchID mid)
----------------------------------------
newtype NodeID
= NodeID Natural
deriving (Show, Eq, Ord, Num, Enum, Real, Integral)
instance FromJSON NodeID where
parseJSON (Number sn) = return $ toNodeID $ sn2nat sn
parseJSON _ = mzero
instance ToJSON NodeID where
toJSON mid = toJSON (fromNodeID mid)
----------------------------------------
newtype InstructionID
= InstructionID Natural
deriving (Show, Eq, Ord, Num, Enum, Real, Integral)
instance FromJSON InstructionID where
parseJSON (Number sn) = return $ toInstructionID $ sn2nat sn
parseJSON _ = mzero
instance ToJSON InstructionID where
toJSON mid = toJSON (fromInstructionID mid)
----------------------------------------
newtype TargetMachineID
= TargetMachineID String
deriving (Show, Eq)
instance FromJSON TargetMachineID where
parseJSON (String s) = return $ toTargetMachineID $ T.unpack s
parseJSON _ = mzero
instance ToJSON TargetMachineID where
toJSON tmid = toJSON (fromTargetMachineID tmid)
----------------------------------------
-- Help functions
----------------------------------------
-- | Converts an 'Integral' into a 'Natural'. If conversion fails, 'Nothing' is
-- returned.
maybeToNatural :: (Integral i) => i -> Maybe Natural
maybeToNatural x
| x < 0 = Nothing
| otherwise = Just $ Natural $ toInteger x
-- | Converts an 'Integral' into a 'Natural'. If conversion fails, an error is
-- reported.
toNatural :: (Integral i) => i -> Natural
toNatural x =
let n = maybeToNatural x
in if isJust n
then fromJust n
else error $ "toNatural: negative number: " ++
show (toInteger x :: Integer)
-- | Converts a 'Natural' into an 'Integer'.
fromNatural :: Natural -> Integer
fromNatural (Natural i) = i
-- | Converts a scientific number to a natural number. If the number is not an
-- non-negative then an error occurs.
sn2nat :: Scientific -> Natural
sn2nat sn =
let int_value = round sn
in if fromInteger int_value /= sn
then error $ "sn2nat: not an integer: " ++ show sn
else toNatural int_value
fromTargetMachineID :: TargetMachineID -> String
fromTargetMachineID (TargetMachineID i) = i
toTargetMachineID :: String -> TargetMachineID
toTargetMachineID = TargetMachineID
fromMatchID :: MatchID -> Natural
fromMatchID (MatchID i) = i
toMatchID :: (Integral i) => i -> MatchID
toMatchID = MatchID . toNatural
fromNodeID :: NodeID -> Natural
fromNodeID (NodeID i) = i
toNodeID :: (Integral i) => i -> NodeID
toNodeID = NodeID . toNatural
fromInstructionID :: InstructionID -> Natural
fromInstructionID (InstructionID i) = i
toInstructionID :: (Integral i) => i -> InstructionID
toInstructionID = InstructionID . toNatural
toMatch :: Ord n => [Mapping n] -> Match n
toMatch ms =
let insert (n1, n2) m = M.insertWith (++) n1 [n2] m
in Match { f2pMaps = foldr insert M.empty $
map (\m -> (fNode m, pNode m)) ms
, p2fMaps = foldr insert M.empty $
map (\m -> (pNode m, fNode m)) ms
}
fromMatch :: Ord n => Match n -> [Mapping n]
fromMatch m =
M.foldrWithKey
(\fn pns ms -> (ms ++ map (\pn -> Mapping { fNode = fn, pNode = pn }) pns))
[]
(f2pMaps m)
----------------------------------------
-- Main program
----------------------------------------
main :: IO ()
main =
do args <- getArgs
when (length args == 0) $
error $ "No input file"
when (length args > 1) $
error $ "Too many arguments"
let file = head args
str <- BS.readFile file
let pmset = decode str
when (isNothing pmset) $
error $ "Failed to parse JSON"
putStrLn $ BS.unpack $ encode (fromJust pmset :: PatternMatchset)
上面的程序只是解析JSON文件,将其转换回JSON并打印数据。要获得更大的输入文件,只需将对象复制粘贴到{
"match-data": [
{
"instr-id": 31,
"match": [
[2354, 5],
[2343, 3],
[2341, 10],
[2340, 9],
[1478, 8],
[1476, 6]
],
"match-id": 0
}
],
"target-machine-id": "Architecture",
"time": 27.642428397
}
列表中并将其附加到列表中即可。
我尝试使用-O2标志编译程序,但无济于事。
答案 0 :(得分:1)
尝试更改:
putStrLn $ BS.unpack $ encode (fromJust pmset :: PatternMatchset)
到
BS.putStrLn $ encode (fromJust pmset :: PatternMatchset)
前者导致我的机器进入交换地狱。后者完成得很好。