如何将 lazy 字节字符串与另一个字节字符串(例如"\r\n"
)分开?我正在寻找以下功能:
BSL.ByteString -> BSL.ByteString -> [BSL.ByteString]
我知道breakSubstring
但该功能仅适用于严格的字节串。我也看到了这个question,但解决方案是使用严格的字节串。
答案 0 :(得分:1)
回答我自己的问题:我创建了一个pull request,将breakSubstring
添加到Data.ByteString.Lazy
(改编自严格版本)。
在合并拉取请求之前,可以使用以下代码:
{-# LANGUAGE BangPatterns #-}
module Lib (breakSubstring) where
import Data.Bits (finiteBitSize, shiftL, (.|.), (.&.))
import Data.Word (Word32)
import Prelude
import qualified Data.ByteString.Lazy as BSL
breakSubstring
:: BSL.ByteString
-> BSL.ByteString
-> (BSL.ByteString, BSL.ByteString)
breakSubstring pat =
case lp of
0 -> \src -> (BSL.empty, src)
1 -> BSL.break (== BSL.head pat)
_ -> if lp * 8 <= fromIntegral (finiteBitSize (0 :: Word))
then shift
else karpRabin
where
lp = BSL.length pat
karpRabin :: BSL.ByteString -> (BSL.ByteString, BSL.ByteString)
karpRabin src
| BSL.length src < lp = (src, BSL.empty)
| otherwise = search (rollingHash $ BSL.take lp src) lp
where
k = 2891336453 :: Word32
rollingHash = BSL.foldl' (\h b -> h * k + fromIntegral b) 0
hp = rollingHash pat
m = k ^ lp
get = fromIntegral . BSL.index src
search !hs !i
| hp == hs && pat == BSL.take lp b = u
| BSL.length src <= i = (src, BSL.empty)
| otherwise = search hs' (i + 1)
where
u@(_, b) = BSL.splitAt (i - lp) src
hs' = hs * k +
get i -
m * get (i - lp)
{-# INLINE karpRabin #-}
shift :: BSL.ByteString -> (BSL.ByteString, BSL.ByteString)
shift !src
| BSL.length src < lp = (src, BSL.empty)
| otherwise = search (intoWord $ BSL.take lp src) lp
where
intoWord :: BSL.ByteString -> Word
intoWord = BSL.foldl' (\w b -> (w `shiftL` 8) .|. fromIntegral b) 0
wp = intoWord pat
mask = (1 `shiftL` fromIntegral (8 * lp)) - 1
search !w !i
| w == wp = BSL.splitAt (i - lp) src
| BSL.length src <= i = (src, BSL.empty)
| otherwise = search w' (i + 1)
where
b = fromIntegral (BSL.index src i)
w' = mask .&. ((w `shiftL` 8) .|. b)
{-# INLINE shift #-}