我开始学习FParsec。它有一种非常灵活的方式来解析数字;我可以提供一组我想要使用的数字格式:
type Number =
| Numeral of int
| Decimal of float
| Hexadecimal of int
| Binary of int
let numberFormat = NumberLiteralOptions.AllowFraction
||| NumberLiteralOptions.AllowHexadecimal
||| NumberLiteralOptions.AllowBinary
let pnumber =
numberLiteral numberFormat "number"
|>> fun num -> if num.IsHexadecimal then Hexadecimal (int num.String)
elif num.IsBinary then Binary (int num.String)
elif num.IsInteger then Numeral (int num.String)
else Decimal (float num.String)
但是,我试图解析的语言有点奇怪。数字可以是数字(非负int
),十进制(非负float
),十六进制(带前缀#x
)或二进制(带前缀#b
):
numeral: 0, 2
decimal: 0.2, 2.0
hexadecimal: #xA04, #x611ff
binary: #b100, #b001
现在我必须通过#
替换0
(如有必要)来解析两次以使用pnumber
:
let number: Parser<_, unit> =
let isDotOrDigit c = isDigit c || c = '.'
let numOrDec = many1Satisfy2 isDigit isDotOrDigit
let hexOrBin = skipChar '#' >>. manyChars (letter <|> digit) |>> sprintf "0%s"
let str = spaces >>. numOrDec <|> hexOrBin
str |>> fun s -> match run pnumber s with
| Success(result, _, _) -> result
| Failure(errorMsg, _, _) -> failwith errorMsg
在这种情况下,解析的更好方法是什么?或者我如何改变FParsec的CharStream
以使条件解析更容易?
答案 0 :(得分:10)
如果要生成良好的错误消息并正确检查溢出,解析数字可能会非常混乱。
以下是数字解析器的简单FParsec实现:
let numeralOrDecimal : Parser<_, unit> =
// note: doesn't parse a float exponent suffix
numberLiteral NumberLiteralOptions.AllowFraction "number"
|>> fun num ->
// raises an exception on overflow
if num.IsInteger then Numeral(int num.String)
else Decimal(float num.String)
let hexNumber =
pstring "#x" >>. many1SatisfyL isHex "hex digit"
|>> fun hexStr ->
// raises an exception on overflow
Hexadecimal(System.Convert.ToInt32(hexStr, 16))
let binaryNumber =
pstring "#b" >>. many1SatisfyL (fun c -> c = '0' || c = '1') "binary digit"
|>> fun hexStr ->
// raises an exception on overflow
Binary(System.Convert.ToInt32(hexStr, 2))
let number =
choiceL [numeralOrDecimal
hexNumber
binaryNumber]
"number literal"
在溢出时生成良好的错误消息会使这个实现稍微复杂化,因为理想情况下还需要在错误之后回溯,以便错误位置在数字文字的开头结束(参见numberLiteral docs for a example)。
优雅地处理可能的溢出异常的一种简单方法是使用一个小异常处理组合器,如下所示:
let mayThrow (p: Parser<'t,'u>) : Parser<'t,'u> =
fun stream ->
let state = stream.State
try
p stream
with e -> // catching all exceptions is somewhat dangerous
stream.BacktrackTo(state)
Reply(FatalError, messageError e.Message)
然后你可以写
let number = mayThrow (choiceL [...] "number literal")
我不确定你的意思是“改变FParsec的CharStream
能够更容易地进行条件解析”,但下面的示例演示了如何编写仅使用CharStream
方法直接。
type NumberStyles = System.Globalization.NumberStyles
let invariantCulture = System.Globalization.CultureInfo.InvariantCulture
let number: Parser<Number, unit> =
let expectedNumber = expected "number"
let inline isBinary c = c = '0' || c = '1'
let inline hex2int c = (int c &&& 15) + (int c >>> 6)*9
let hexStringToInt (str: string) = // does no argument or overflow checking
let mutable n = 0
for c in str do
n <- n*16 + hex2int c
n
let binStringToInt (str: string) = // does no argument or overflow checking
let mutable n = 0
for c in str do
n <- n*2 + (int c - int '0')
n
let findIndexOfFirstNonNull (str: string) =
let mutable i = 0
while i < str.Length && str.[i] = '0' do
i <- i + 1
i
let isHexFun = id isHex // tricks the compiler into caching the function object
let isDigitFun = id isDigit
let isBinaryFun = id isBinary
fun stream ->
let start = stream.IndexToken
let cs = stream.Peek2()
match cs.Char0, cs.Char1 with
| '#', 'x' ->
stream.Skip(2)
let str = stream.ReadCharsOrNewlinesWhile(isHexFun, false)
if str.Length <> 0 then
let i = findIndexOfFirstNonNull str
let length = str.Length - i
if length < 8 || (length = 8 && str.[i] <= '7') then
Reply(Hexadecimal(hexStringToInt str))
else
stream.Seek(start)
Reply(Error, messageError "hex number literal is too large for 32-bit int")
else
Reply(Error, expected "hex digit")
| '#', 'b' ->
stream.Skip(2)
let str = stream.ReadCharsOrNewlinesWhile(isBinaryFun, false)
if str.Length <> 0 then
let i = findIndexOfFirstNonNull str
let length = str.Length - i
if length < 32 then
Reply(Binary(binStringToInt str))
else
stream.Seek(start)
Reply(Error, messageError "binary number literal is too large for 32-bit int")
else
Reply(Error, expected "binary digit")
| c, _ ->
if not (isDigit c) then Reply(Error, expectedNumber)
else
stream.SkipCharsOrNewlinesWhile(isDigitFun) |> ignore
if stream.Skip('.') then
let n2 = stream.SkipCharsOrNewlinesWhile(isDigitFun)
if n2 <> 0 then
// we don't parse any exponent, as in the other example
let mutable result = 0.
if System.Double.TryParse(stream.ReadFrom(start),
NumberStyles.AllowDecimalPoint,
invariantCulture,
&result)
then Reply(Decimal(result))
else
stream.Seek(start)
Reply(Error, messageError "decimal literal is larger than System.Double.MaxValue")
else
Reply(Error, expected "digit")
else
let decimalString = stream.ReadFrom(start)
let mutable result = 0
if System.Int32.TryParse(stream.ReadFrom(start),
NumberStyles.None,
invariantCulture,
&result)
then Reply(Numeral(result))
else
stream.Seek(start)
Reply(Error, messageError "decimal number literal is too large for 32-bit int")
虽然这个实现在没有系统方法帮助的情况下解析十六进制和二进制数,但最终会将十进制数的解析委托给Int32.TryParse和Double.TryParse方法。
正如我所说:它很混乱。