我在python中为我的语言编写了一个tokenizer,但是当我尝试对文件进行标记时,它只会将其标记为一个限制。当它应该标记所有文件时,它只有大约90个令牌(单独的单词和符号)。 这是代码:
import re
file = input("filename>")
with open(file, 'r') as myfile:
data=myfile.read().replace('\n', '')
scanner = re.Scanner([
(r"[0-9]+", lambda scanner,token:("NUMBER", token)),
(r"[a-z_A-Z_λ]+", lambda scanner,token:("KEYWORD", token)),
(r"[,.!#%^*()']+", lambda scanner,token:("OPERATOR", token)),
(r'["]+', lambda scanner,token:("OPERATOR", token)),
(r"[+-]+", lambda scanner,token:("OPERATOR", token)),
(r'[=]+', lambda scanner,token:("OPERATOR", token)),
(r"[{}]+", lambda scanner,token:("OPERATOR", token)),
(r'[[]]+', lambda scanner,token:("OPERATOR", token)),
(r"\s+", None), # None == skip token.
])
results, remainder = scanner.scan(data)
print(results)
示例脚本是(对于那些有时间阅读的人):
constant Flow = "Flow"
constant script = this
local names = {'Gabriel', 'Kauan', 'Laura', 'Tarsila'}
constant flowCountry = 'Brasil'
local void function getinpairs(name) extends findArg()
for _, v(name) in pairs(names) do
private local table = names
print("Flow being the best programming language for you, has implemented some new arguments!")
local flowFounder = names[1]
local namesMetatable = getmetatable(t1)
end
end
function findArg(name)
return getinpairs(name)
end
findArg('Gabriel')
结果是(对于那些有时间阅读的人):
[('KEYWORD', 'constant'), ('KEYWORD', 'Flow'), ('OPERATOR', '='), ('OPERATOR', '"'), ('KEYWORD', 'Flow'), ('OPERATOR', '"'), ('KEYWORD', 'constant'), ('KEYWORD', 'script'), ('OPERATOR', '='), ('KEYWORD', 'thislocal'), ('KEYWORD', 'names'), ('OPERATOR', '='), ('OPERATOR', '{'), ('OPERATOR', "'"), ('KEYWORD', 'Gabriel'), ('OPERATOR', "',"), ('OPERATOR', "'"), ('KEYWORD', 'Kauan'), ('OPERATOR', "',"), ('OPERATOR', "'"), ('KEYWORD', 'Laura'), ('OPERATOR', "',"), ('OPERATOR', "'"), ('KEYWORD', 'Tarsila'), ('OPERATOR', "'"), ('OPERATOR', '}'), ('KEYWORD', 'constant'), ('KEYWORD', 'flowCountry'), ('OPERATOR', '='), ('OPERATOR', "'"), ('KEYWORD', 'Brasil'), ('OPERATOR', "'"), ('KEYWORD', 'local'), ('KEYWORD', 'void'), ('KEYWORD', 'function'), ('KEYWORD', 'getinpairs'), ('OPERATOR', '('), ('KEYWORD', 'name'), ('OPERATOR', ')'), ('KEYWORD', 'extends'), ('KEYWORD', 'findArg'), ('OPERATOR', '()'), ('KEYWORD', 'for'), ('KEYWORD', '_'), ('OPERATOR', ','), ('KEYWORD', 'v'), ('OPERATOR', '('), ('KEYWORD', 'name'), ('OPERATOR', ')'), ('KEYWORD', 'in'), ('KEYWORD', 'pairs'), ('OPERATOR', '('), ('KEYWORD', 'names'), ('OPERATOR', ')'), ('KEYWORD', 'do'), ('KEYWORD', 'private'), ('KEYWORD', 'local'), ('KEYWORD', 'table'), ('OPERATOR', '='), ('KEYWORD', 'names'), ('KEYWORD', 'print'), ('OPERATOR', '('), ('OPERATOR', '"'), ('KEYWORD', 'Flow'), ('KEYWORD', 'being'), ('KEYWORD', 'the'), ('KEYWORD', 'best'), ('KEYWORD', 'programming'), ('KEYWORD', 'language'), ('KEYWORD', 'for'), ('KEYWORD', 'you'), ('OPERATOR', ','), ('KEYWORD', 'has'), ('KEYWORD', 'implemented'), ('KEYWORD', 'some'), ('KEYWORD', 'new'), ('KEYWORD', 'arguments'), ('OPERATOR', '!'), ('OPERATOR', '"'), ('OPERATOR', ')'), ('KEYWORD', 'local'), ('KEYWORD', 'flowFounder'), ('OPERATOR', '='), ('KEYWORD', 'names')]
它停在第11行:word" name"。
有人可以在脚本上指出错误吗?
答案 0 :(得分:0)
您需要转义括号:
r'[[]]+'
应该是
r'[\[\]]+'