我正在研究Google ProtoBuff,我试图在python中使用SimpleParse解析原型文件。
我在SimpleParse中使用EBNF格式,它显示成功,但结果树中没有任何内容,不确定出现了什么问题。任何帮助都会非常感激。
以下是语法文件:
proto ::= ( message / extend / enum / import / package / option / ';' )*
import ::= 'import' , strLit , ';'
package ::= 'package' , ident , ( '.' , ident )* , ';'
option ::= 'option' , optionBody , ';'
optionBody ::= ident , ( '.' , ident )* , '=' , constant
message ::= 'message' , ident , messageBody
extend ::= 'extend' , userType , '{' , ( field / group / ';' )* , '}'
enum ::= 'enum' , ident , '{' , ( option / enumField / ';' )* , '}'
enumField ::= ident , '=' , intLit , ';'
service ::= 'service' , ident , '{' , ( option / rpc / ';' )* , '}'
rpc ::= 'rpc' , ident , '(' , userType , ')' , 'returns' , '(' , userType , ')' , ';'
messageBody ::= '{' , ( field / enum / message / extend / extensions / group / option / ':' )* , '}'
group ::= label , 'group' , camelIdent , '=' , intLit , messageBody
field ::= label , type , ident , '=' , intLit , ( '[' , fieldOption , ( ',' , fieldOption )* , ']' )? , ';'
fieldOption ::= optionBody / 'default' , '=' , constant
extensions ::= 'extensions' , extension , ( ',' , extension )* , ';'
extension ::= intLit , ( 'to' , ( intLit / 'max' ) )?
label ::= 'required' / 'optional' / 'repeated'
type ::= 'double' / 'float' / 'int32' / 'int64' / 'uint32' / 'uint64' / 'sint32' / 'sint64' / 'fixed32' / 'fixed64' / 'sfixed32' / 'sfixed64' / 'bool' / 'string' / 'bytes' / userType
userType ::= '.'? , ident , ( '.' , ident )*
constant ::= ident / intLit / floatLit / strLit / boolLit
ident ::= [A-Za-z_],[A-Za-z0-9_]*
camelIdent ::= [A-Z],[\w_]*
intLit ::= decInt / hexInt / octInt
decInt ::= [1-9],[\d]*
hexInt ::= [0],[xX],[A-Fa-f0-9]+
octInt ::= [0],[0-7]+
floatLit ::= [\d]+ , [\.\d+]?
boolLit ::= 'true' / 'false'
strLit ::= quote ,( hexEscape / octEscape / charEscape / [^\0\n] )* , quote
quote ::= ['']
hexEscape ::= [\\],[Xx],[A-Fa-f0-9]
octEscape ::= [\\0]? ,[0-7]
charEscape ::= [\\],[abfnrtv\\\?'']
这是我正在尝试的python代码:
from simpleparse.parser import Parser
from pprint import pprint
protoGrammar = ""
protoInput = ""
protoGrammarRoot = "proto"
with open ("proto_grammar.ebnf", "r") as grammarFile:
protoGrammar=grammarFile.read()
with open("sample.proto", "r") as protoFile:
protoInput = protoFile.read().replace('\n', '')
parser = Parser(protoGrammar,protoGrammarRoot)
success, resultTree, newCharacter = parser.parse(protoInput)
pprint(protoInput)
pprint(success)
pprint(resultTree)
pprint(newCharacter)
这是我试图解析的原型文件
message AmbiguousMsg {
optional string mypack_ambiguous_msg = 1;
optional string mypack_ambiguous_msg1 = 1;
}
我得到输出为
1
[]
0
答案 0 :(得分:0)
我是Python新手,但我想出了这个,虽然我不完全确定你的输出格式。希望这会指出你正确的方向。您可以随意修改以下代码以满足您的要求。
#!/usr/bin/python
# (c) 2015 enthusiasticgeek for StackOverflow. Use the code in anyway you want but leave credits intact. Also use this code at your own risk. I do not take any responsibility for your usage - blame games and trolls will strictly *NOT* be tolerated.
import re
#data_types=['string','bool','enum','int32','uint32','int64','uint64','sint32','sint64','bytes','string','fixed32','sfixed32','float','fixed64','sfixed64','double']
#function # 1
#Generate list of units in the brackets
#================ tokens based on braces ====================
def find_balanced_braces(args):
parts = []
for arg in args:
if '{' not in arg:
continue
chars = []
n = 0
for c in arg:
if c == '{':
if n > 0:
chars.append(c)
n += 1
elif c == '}':
n -= 1
if n > 0:
chars.append(c)
elif n == 0:
parts.append(''.join(chars).lstrip().rstrip())
chars = []
elif n > 0:
chars.append(c)
return parts
#function # 2
#================ Retrieve Nested Levels ====================
def find_nested_levels(test, count_level):
count_level=count_level+1
level = find_balanced_braces(test)
if not bool(level):
return count_level-1
else:
return find_nested_levels(level,count_level)
#function # 3
#================ Process Nested Levels ====================
def process_nested_levels(test, count_level):
count_level=count_level+1
level = find_balanced_braces(test)
print "===== Level = " + str(count_level) + " ====="
for i in range(len(level)):
#print level[i] + "\n"
exclusive_level_messages = ''.join(level[i]).split("message")[0]
exclusive_level_messages_tokenized = ''.join(exclusive_level_messages).split(";")
#print exclusive_level_messages + "\n"
for j in range(len(exclusive_level_messages_tokenized)):
pattern = exclusive_level_messages_tokenized[j].lstrip()
print pattern
#match = "\message \s*(.*?)\s*\{"+pattern
#match_result = re.findall(match, level[i])
#print match_result
print "===== End Level ====="
if not bool(level):
return count_level-1
else:
return process_nested_levels(level,count_level)
#============================================================
#=================================================================================
test_string=("message a{ optional string level-i1.l1.1 = 1 [default = \"/\"]; "
"message b{ required bool level-i1.l2.1 = 1; required fixed32 level-i1.l2.1 = 2; "
"message c{ required string level-i1.l3.1 = 1; } "
"} "
"} "
"message d{ required uint64 level-i2.l1.1 = 1; required double level-i2.l1.2 = 2; "
"message e{ optional double level-i2.l2.1 = 1; "
"message f{ optional fixed64 level-i2.l3.1 = 1; required fixed32 level-i2.l3.2 = 2; "
"message g{ required bool level-i2.l4.1 = 2; } "
"} "
"} "
"} "
"message h{ required uint64 level-i3.l1.1 = 1; required double level-i3.l1.2 = 2; }")
#Right now I do not see point in replacing \n with blank space
with open ("fileproto.proto", "r") as myfile:
data=myfile.read().replace('\n', '\n')
print data
count_level=0
#replace 'data' in the following line with 'test_string' for tests
nested_levels=process_nested_levels([data],count_level)
print "Total count levels depth = " + str(nested_levels)
print "========================\n"
我的输出如下
// This defines protocol for a simple server that lists files.
//
// See also the nanopb-specific options in fileproto.options.
message ListFilesRequest {
optional string path = 1 [default = "/"];
}
message FileInfo {
required uint64 inode = 1;
required string name = 2;
}
message ListFilesResponse {
optional bool path_error = 1 [default = false];
repeated FileInfo file = 2;
}
===== Level = 1 =====
optional string path = 1 [default = "/"]
required uint64 inode = 1
required string name = 2
optional bool path_error = 1 [default = false]
repeated FileInfo file = 2
===== End Level =====
===== Level = 2 =====
===== End Level =====
Total count levels depth = 1
========================
注意 print pattern
之后,如果需要,您可以在输入中选择pattern
进一步标记。我用正则表达式评论了一个例子。