大家好我是Python的初学者,我为自己的语言(类似于C ++)编写了一个用于学习的小程序。我在使用Python格式化列表时遇到问题。
问题 以下是示例令牌输出:
['Char Literal', "'", 4]
['Special_Char', '\\', 4]
['Special_Char', '\\', 4]
['Character', '\n', 4]
['Arithmetic', '+', 4]
['Char Literal', "'", 4]
['Identifier', 'a', 5]
['Arithmetic', '+', 5]
['Identifier', 'b', 5]
['INC_DEC', '++', 5]
['RelationOp', '==', 5]
['AssignmentOp', '=', 5]
['RelationOp', '<=', 5]
['AssignmentOp', '=', 5]
['RelationOp', '>=', 5]
['AssignmentOp', '=', 5]
['Logical_NOT', '!', 5]
['Logical_NOT', '!', 5]
['AssignmentOp', '=', 5]
['Left Square Br', '[', 6]
['Right Square Br', ']', 6]
您会看到,有单引号'包含类部分和价值部分。并且在特殊字符的情况下,有双引号“ 。如何格式化列表的输出?因为这在解析器中很难读取。
我想要的输出是:
['Arithmetic', '+', 4] => [Arithmetic, +, 4]
当我打印转义序列或特殊字符时,它们也会打印两次。例如:
Single-Backslash \ is Printed As Double-Backslash \\
我的代码
def make_tokens(self):
tokens = []
check = False
while self.current_Char is not None:
flag = False
if self.current_Char == ' ':
self.advance()
elif self.current_Char == '\n':
self.lineno += 1
self.advance()
elif escape((self.current_Char + self.text[self.pos + 1])):
esc_seq = self.current_Char + self.text[self.pos + 1]
self.advance()
tokens.append(['Escape Sequence', esc_seq, self.lineno])
self.advance()
elif self.current_Char in PUNCTUATOR and (self.text[self.pos + 1] in DIGITS or self.text[self.pos + 1] in DIGITS + ALPHABETS):
tokens.append([self.number(), self.lineno])
elif self.current_Char in PUNCTUATOR:
tokens.append(['Punctuator', self.current_Char, self.lineno])
self.advance()
elif self.current_Char in LEFT_PARA:
tokens.append(['Left Parenthesis', self.current_Char, self.lineno])
self.advance()
elif self.current_Char in LEFT_CUR:
tokens.append(['Left Curly Br', self.current_Char, self.lineno])
self.advance()
elif self.current_Char in LEFT_SQR:
tokens.append(['Left Square Br', self.current_Char, self.lineno])
self.advance()
elif self.current_Char in RIGHT_PARA:
tokens.append(['Right Parenthesis', self.current_Char, self.lineno])
self.advance()
elif self.current_Char in RIGHT_CUR:
tokens.append(['Right Curly Br', self.current_Char, self.lineno])
self.advance()
elif self.current_Char in RIGHT_SQR:
tokens.append(['Right Square Br', self.current_Char, self.lineno])
self.advance()
elif self.current_Char in LOGICAL_NOT:
tokens.append(['Logical_NOT', self.current_Char, self.lineno])
self.advance()
elif self.current_Char in LIT_CHAR:
if self.text[self.pos + 1] and self.text[self.pos + 2] in LIT_CHAR:
self.advance()
tokens.append(['Character', self.current_Char, self.lineno])
self.advance()
elif escape((self.text[self.pos + 1] + self.text[self.pos + 2])) and self.text[self.pos + 3] in LIT_CHAR:
self.advance()
esc_seq = self.current_Char + self.text[self.pos + 1]
tokens.append(['Escape Sequence', esc_seq, self.lineno])
self.advance()
else:
tokens.append(['Char Literal', self.current_Char, self.lineno])
self.advance()
elif self.current_Char in SPC_CHAR:
tokens.append(['Special_Char', self.current_Char, self.lineno])
self.advance()
elif self.current_Char + self.text[self.pos + 1] in LOGICAL_AND:
tokens.append(['Logical_AND', self.current_Char + self.text[self.pos + 1], self.lineno])
self.advance()
elif self.current_Char + self.text[self.pos + 1] in LOGICAL_OR:
tokens.append(['Logical_OR', self.current_Char + self.text[self.pos + 1], self.lineno])
self.advance()
elif self.current_Char + self.text[self.pos + 1] in RELATION_OP:
temp_value = ''
temp_value += self.current_Char + self.text[self.pos + 1]
self.advance()
tokens.append(['RelationOp', temp_value, self.lineno])
temp_value = ''
self.advance()
elif self.current_Char in ASSIGNMENT_OP and self.current_Char + self.text[
self.pos + 1] not in ASSIGNMENT_OP:
tokens.append(['AssignmentOp', self.current_Char, self.lineno])
self.advance()
elif self.current_Char + self.text[self.pos + 1] in ASSIGNMENT_OP:
temp_value = ''
temp_value += self.current_Char + self.text[self.pos + 1]
self.advance()
tokens.append(['AssignmentOp', temp_value, self.lineno])
temp_value = ''
self.advance()
elif self.current_Char + self.text[self.pos + 1] in INC_DEC:
temp_value = ''
temp_value += self.current_Char + self.text[self.pos + 1]
self.advance()
tokens.append(['INC_DEC', temp_value, self.lineno])
temp_value = ''
self.advance()
elif self.current_Char in ARTH and self.text[self.pos + 1] not in ARTH:
tokens.append(['Arithmetic', self.current_Char, self.lineno])
self.advance()
elif self.current_Char in TERMINATOR:
tokens.append(['Terminator', self.current_Char, self.lineno])
self.advance()
elif self.current_Char in DIGITS:
tokens.append([self.number(), self.lineno])
elif self.current_Char == '#' and self.text[self.pos + 1] == '-':
temp_value = ''
temp_value += temp_value + self.current_Char + self.text[self.pos + 1]
self.pos += 2
if temp_value in COMMENTS:
while flag is not True:
if self.current_Char == '\n':
flag = True
temp_value = ''
# self.advance() # Remove this for Sequential Line No
else:
self.advance()
else:
print('Error 404')
elif self.current_Char == '#' and self.text[self.pos + 1] == '/':
temp_value = ''
temp_value += temp_value + self.current_Char + self.text[self.pos + 1]
self.pos += 2
if temp_value in COMMENTS:
while flag is not True:
if self.current_Char == '\n':
self.lineno += 1
self.advance()
elif self.current_Char == '/':
if self.text[self.pos + 1] == '#':
self.pos += 2
flag = True
temp_value = ''
self.lineno += 1
self.advance()
else:
self.advance()
else:
self.advance()
else:
print('Error 404')
else:
temp_value = ''
while flag is not True:
temp_value += self.current_Char
if self.text[self.pos + 1] == ' ' or self.text[self.pos + 1] == '\n' or self.text[
self.pos + 1] in CHARAC:
if temp_value in RUN_CLASS:
tokens.append(['Run_Class', temp_value, self.lineno])
temp_value = ''
flag = True
self.advance()
elif temp_value in DATATYPE:
tokens.append(['DataType', temp_value, self.lineno])
temp_value = ''
flag = True
self.advance()
elif temp_value in KEYWORDS:
tokens.append(['Keyword', temp_value, self.lineno])
temp_value = ''
flag = True
self.advance()
elif identify(temp_value):
tokens.append(['Identifier', temp_value, self.lineno])
temp_value = ''
flag = True
self.advance()
else:
tokens.append(['Lexical Error', temp_value, self.lineno])
temp_value = ''
flag = True
self.advance()
else:
self.advance()
return tokens
def run():
input_file = InputFile(File)
text = input_file.data()
lexer = Lexer(text)
tokens = lexer.make_tokens()
print(*tokens, sep='\n')
with open('Tokens.txt', 'w') as Lexemes:
for TokenSet in tokens:
Lexemes.write("%s\n" % TokenSet)
run()