我已经asked a question about how to parse the arrow type,这不是重复的,而是对基于缩进的语法的改编。
实际上,我希望能够分析一种类似于ML系列语言的语法。我还在Haskell中介绍了函数的类型签名的语法,因此:
myFunction :: atype
我的解析器对于所有类型的签名类型都可以很好地工作,除了箭头类型是“单独”时是这样:
foo :: a // ok
foo :: [a] // ok
foo :: (a, a) // ok
foo :: [a -> a] // ok
foo :: (a -> a, a) // ok
foo :: a -> a // error
与创建函数相同(为简单起见,我只是希望数字作为值):
foo: a = 0 // ok
foo: [a] = 0 // ok
foo: (a, a) = 0 // ok
foo: [a -> a] = 0 // ok
foo: (a -> a, a) = 0 // ok
foo: a -> a = 0 // error
没有缩进,所有这些情况都是先验的。
我尝试了除FParsec Wiki之外的其他模块来分析缩进,只是尝试进行一些评估。 It comes from there,以下是该问题的必要和充分的模块代码:
module IndentParser =
type Indentation =
| Fail
| Any
| Greater of Position
| Exact of Position
| AtLeast of Position
| StartIndent of Position
with
member this.Position = match this with
| Any | Fail -> None
| Greater p -> Some p
| Exact p -> Some p
| AtLeast p -> Some p
| StartIndent p -> Some p
type IndentState<'T> = { Indent : Indentation; UserState : 'T }
type CharStream<'T> = FParsec.CharStream<IndentState<'T>>
type IndentParser<'T, 'UserState> = Parser<'T, IndentState<'UserState>>
let indentState u = {Indent = Any; UserState = u}
let runParser p u s = runParserOnString p (indentState u) "" s
let runParserOnFile p u path = runParserOnFile p (indentState u) path System.Text.Encoding.UTF8
let getIndentation : IndentParser<_,_> =
fun stream -> match stream.UserState with
| {Indent = i} -> Reply i
let getUserState : IndentParser<_,_> =
fun stream -> match stream.UserState with
| {UserState = u} -> Reply u
let putIndentation newi : IndentParser<unit, _> =
fun stream ->
stream.UserState <- {stream.UserState with Indent = newi}
Reply(Unchecked.defaultof<unit>)
let failf fmt = fail << sprintf fmt
let acceptable i (pos : Position) =
match i with
| Any _ -> true
| Fail -> false
| Greater bp -> bp.Column < pos.Column
| Exact ep -> ep.Column = pos.Column
| AtLeast ap -> ap.Column <= pos.Column
| StartIndent _ -> true
let tokeniser p = parse {
let! pos = getPosition
let! i = getIndentation
if acceptable i pos then return! p
else return! failf "incorrect indentation at %A" pos
}
let indented<'a,'u> i (p : Parser<'a,_>) : IndentParser<_, 'u> = parse {
do! putIndentation i
do! spaces
return! tokeniser p
}
/// Allows to check if the position of the parser currently being analyzed (`p`)
/// is on the same line as the defined position (`pos`).
let exact<'a,'u> pos p: IndentParser<'a, 'u> = indented (Exact pos) p
/// Allows to check if the position of the parser currently being analyzed (`p`)
/// is further away than the defined position (`pos`).
let greater<'a,'u> pos p: IndentParser<'a, 'u> = indented (Greater pos) p
/// Allows to check if the position of the parser currently being analyzed (`p`)
/// is on the same OR line further than the defined position (`pos`).
let atLeast<'a,'u> pos p: IndentParser<'a, 'u> = indented (AtLeast pos) p
/// Simply check if the parser (`p`) exists, regardless of its position in the text to be analyzed.
let any<'a,'u> pos p: IndentParser<'a, 'u> = indented Any p
let newline<'u> : IndentParser<unit, 'u> = many (skipAnyOf " \t" <?> "whitespace") >>. newline |>> ignore
let rec blockOf p = parse {
do! spaces
let! pos = getPosition
let! x = exact pos p
let! xs = attempt (exact pos <| blockOf p) <|> preturn []
return x::xs
}
现在,这是我要解决的代码:
module Parser =
open IndentParser
type Identifier = string
type Type =
| Typename of Identifier
| Tuple of Type list
| List of Type
| Arrow of Type * Type
| Infered
type Expression =
| Let of Identifier * Type * int
| Signature of Identifier * Type
type Program = Program of Expression list
// Utils -----------------------------------------------------------------
let private ws = spaces
/// All symbols granted for the "opws" parser
let private allowedSymbols =
['!'; '@'; '#'; '$'; '%'; '+'; '&'; '*'; '('; ')'; '-'; '+'; '='; '?'; '/'; '>'; '<'; '|']
/// Parse an operator and white spaces around it: `ws >>. p .>> ws`
let inline private opws str =
ws >>.
(tokeniser (pstring str >>?
(nextCharSatisfiesNot
(isAnyOf (allowedSymbols @ ['"'; '''])) <?> str))) .>> ws
let private identifier =
(many1Satisfy2L isLetter
(fun c -> isLetter c || isDigit c) "identifier")
// Types -----------------------------------------------------------------
let rec typename = parse {
let! name = ws >>. identifier
return Type.Typename name
}
and tuple_type = parse {
let! types = between (opws "(") (opws ")") (sepBy (ws >>. type') (opws ","))
return Type.Tuple types
}
and list_type = parse {
let! ty = between (opws "[") (opws "]") type'
return Type.List ty
}
and arrow_type =
chainr1 (typename <|> tuple_type <|> list_type) (opws "->" >>% fun t1 t2 -> Arrow(t1, t2))
and type' =
attempt arrow_type <|>
attempt typename <|>
attempt tuple_type <|>
attempt list_type
// Expressions -----------------------------------------------------------------
let rec private let' = parse {
let! pos = getPosition
let! id = exact pos identifier
do! greater pos (opws ":")
let! ty = greater pos type'
do! greater pos (opws "=")
let! value = greater pos pint32
return Expression.Let(id, ty, value)
}
and private signature = parse {
let! pos = getPosition
let! id = exact pos identifier
do! greater pos (opws "::")
let! ty = greater pos type'
return Expression.Signature(id, ty)
}
and private expression =
attempt let'
and private expressions = blockOf expression <?> "expressions"
let private document = ws >>. expressions .>> ws .>> eof |>> Program
let private testType = ws >>. type' .>> ws .>> eof
let rec parse code =
runParser document () code
|> printfn "%A"
open Parser
parse @"
foo :: a -> a
"
这是获得的错误消息:
错误消息中没有引用缩进,这也是麻烦所在,因为如果我实现一个相同的解析器,除了缩进解析,它就可以工作。
你能让我走对路吗?
编辑
这是“固定”代码(缺少功能签名分析器+删除了不必要的attempt
):
open FParsec
// module IndentParser
module Parser =
open IndentParser
type Identifier = string
type Type =
| Typename of Identifier
| Tuple of Type list
| List of Type
| Arrow of Type * Type
| Infered
type Expression =
| Let of Identifier * Type * int
| Signature of Identifier * Type
type Program = Program of Expression list
// Utils -----------------------------------------------------------------
let private ws = spaces
/// All symbols granted for the "opws" parser
let private allowedSymbols =
['!'; '@'; '#'; '$'; '%'; '+'; '&'; '*'; '('; ')'; '-'; '+'; '='; '?'; '/'; '>'; '<'; '|']
/// Parse an operator and white spaces around it: `ws >>. p .>> ws`
let inline private opws str =
ws >>.
(tokeniser (pstring str >>?
(nextCharSatisfiesNot
(isAnyOf (allowedSymbols @ ['"'; '''])) <?> str))) .>> ws
let private identifier =
(many1Satisfy2L isLetter
(fun c -> isLetter c || isDigit c) "identifier")
// Types -----------------------------------------------------------------
let rec typename = parse {
let! name = ws >>. identifier
return Type.Typename name
}
and tuple_type = parse {
let! types = between (opws "(") (opws ")") (sepBy (ws >>. type') (opws ","))
return Type.Tuple types
}
and list_type = parse {
let! ty = between (opws "[") (opws "]") type'
return Type.List ty
}
and arrow_type =
chainr1 (typename <|> tuple_type <|> list_type) (opws "->" >>% fun t1 t2 -> Arrow(t1, t2))
and type' =
attempt arrow_type <|>
typename <|>
tuple_type <|>
list_type
// Expressions -----------------------------------------------------------------
let rec private let' = parse {
let! pos = getPosition
let! id = exact pos identifier
do! greater pos (opws ":")
let! ty = greater pos type'
do! greater pos (opws "=")
let! value = greater pos pint32
return Expression.Let(id, ty, value)
}
and private signature = parse {
let! pos = getPosition
let! id = exact pos identifier
do! greater pos (opws "::")
let! ty = greater pos type'
return Expression.Signature(id, ty)
}
and private expression =
attempt let' <|>
signature
and private expressions = blockOf expression <?> "expressions"
let private document = ws >>. expressions .>> ws .>> eof |>> Program
let private testType = ws >>. type' .>> ws .>> eof
let rec parse code =
runParser document () code
|> printfn "%A"
open Parser
System.Console.Clear()
parse @"
foo :: a -> a
"
因此,这是新的错误消息:
答案 0 :(得分:1)
目前,您的代码在::
签名上失败,因为您实际上没有在任何地方使用signature
解析器。您已将expression
定义为attempt let'
,但我认为您打算写attempt signature <|> attempt let'
。这就是为什么您的测试在::
的第二个冒号上失败的原因,因为它与let'
的单个冒号匹配,然后又不期望第二个冒号。
此外,我认为将attempt
这样的多个attempt a <|> attempt b <|> attempt c
链接在一起会给您带来麻烦,因此您应该删除最后的attempt
,例如{{1} }。如果在所有可能的选择中都使用attempt a <|> attempt b <|> c
,那么最终将得到一个解析器,该解析器可以通过不解析任何内容而成功,这通常不是您想要的。
更新:我想我已经找到了原因和解决方法。
摘要:在您的attempt
解析器中,将opws
行替换为ws >>.
。
说明:在所有ws >>?
变体中(并且sepBy
是chainr1
变体),FParsec期望分隔符解析器将成功或将不消耗任何输入失败。 (如果分隔符在使用完输入后失败,则FParsec会认为整个sepBy
系列解析器全部失败了。)但是您的sepBy
解析器将使用空格,如果找不到则失败正确的运算符。因此,当您的opws
解析器解析字符串arrow_type
后跟换行符时,第一个a -> a
之后的箭头正确匹配,然后它看到了第二个{{ 1}},然后尝试找到另一个箭头。由于接下来紧跟的是至少一个空格字符(newlines count as whitespace),因此a
解析器最终在消耗一些输入之前才失败。 (它失败是因为在该空格之后是文件的结尾,而不是另一个a
标记)。这使opws "->"
组合器失败,因此->
失败,并且您的chainr1
解析器最终被解析为单一类型arrow_type
。 (此时,箭头出乎意料了。)
通过在a -> a
的定义中使用a
,可以确保如果解析器的第二部分失败,它将在匹配任何空白之前回退到。这样可以确保分隔符解析器在没有匹配输入的情况下失败,并且不提高字符流中的解析位置。因此,>>?
解析器在解析opws
之后成功,您将获得预期的结果。