词法分析器令牌格式

时间:2019-12-08 10:40:26

标签: python python-3.x list compiler-construction repr

大家好我是Python的初学者,我为自己的语言(类似于C ++)编写了一个用于学习的小程序。我在使用Python格式化列表时遇到问题。

问题 以下是示例令牌输出:

['Char Literal', "'", 4]
['Special_Char', '\\', 4]
['Special_Char', '\\', 4]
['Character', '\n', 4]
['Arithmetic', '+', 4]
['Char Literal', "'", 4]
['Identifier', 'a', 5]
['Arithmetic', '+', 5]
['Identifier', 'b', 5]
['INC_DEC', '++', 5]
['RelationOp', '==', 5]
['AssignmentOp', '=', 5]
['RelationOp', '<=', 5]
['AssignmentOp', '=', 5]
['RelationOp', '>=', 5]
['AssignmentOp', '=', 5]
['Logical_NOT', '!', 5]
['Logical_NOT', '!', 5]
['AssignmentOp', '=', 5]
['Left Square Br', '[', 6]
['Right Square Br', ']', 6]

您会看到,有单引号'包含类部分和价值部分。并且在特殊字符的情况下,有双引号“ 。如何格式化列表的输出?因为这在解析器中很难读取。

我想要的输出是:

['Arithmetic', '+', 4] => [Arithmetic, +, 4]

当我打印转义序列或特殊字符时,它们也会打印两次。例如:

Single-Backslash \ is Printed As Double-Backslash \\

我的代码

    def make_tokens(self):
        tokens = []

        check = False
        while self.current_Char is not None:
            flag = False

            if self.current_Char == ' ':
                self.advance()
            elif self.current_Char == '\n':
                self.lineno += 1
                self.advance()
            elif escape((self.current_Char + self.text[self.pos + 1])):
                esc_seq = self.current_Char + self.text[self.pos + 1]
                self.advance()
                tokens.append(['Escape Sequence', esc_seq, self.lineno])
                self.advance()
            elif self.current_Char in PUNCTUATOR and (self.text[self.pos + 1] in DIGITS or self.text[self.pos + 1] in DIGITS + ALPHABETS):
                tokens.append([self.number(), self.lineno])
            elif self.current_Char in PUNCTUATOR:
                tokens.append(['Punctuator', self.current_Char, self.lineno])
                self.advance()
            elif self.current_Char in LEFT_PARA:
                tokens.append(['Left Parenthesis', self.current_Char, self.lineno])
                self.advance()
            elif self.current_Char in LEFT_CUR:
                tokens.append(['Left Curly Br', self.current_Char, self.lineno])
                self.advance()
            elif self.current_Char in LEFT_SQR:
                tokens.append(['Left Square Br', self.current_Char, self.lineno])
                self.advance()
            elif self.current_Char in RIGHT_PARA:
                tokens.append(['Right Parenthesis', self.current_Char, self.lineno])
                self.advance()
            elif self.current_Char in RIGHT_CUR:
                tokens.append(['Right Curly Br', self.current_Char, self.lineno])
                self.advance()
            elif self.current_Char in RIGHT_SQR:
                tokens.append(['Right Square Br', self.current_Char, self.lineno])
                self.advance()
            elif self.current_Char in LOGICAL_NOT:
                tokens.append(['Logical_NOT', self.current_Char, self.lineno])
                self.advance()
            elif self.current_Char in LIT_CHAR:
                if self.text[self.pos + 1] and self.text[self.pos + 2] in LIT_CHAR:
                    self.advance()
                    tokens.append(['Character', self.current_Char, self.lineno])
                    self.advance()
                elif escape((self.text[self.pos + 1] + self.text[self.pos + 2])) and self.text[self.pos + 3] in LIT_CHAR:
                    self.advance()
                    esc_seq = self.current_Char + self.text[self.pos + 1]
                    tokens.append(['Escape Sequence', esc_seq, self.lineno])
                    self.advance()
                else:
                    tokens.append(['Char Literal', self.current_Char, self.lineno])
                self.advance()
            elif self.current_Char in SPC_CHAR:
                tokens.append(['Special_Char', self.current_Char, self.lineno])
                self.advance()
            elif self.current_Char + self.text[self.pos + 1] in LOGICAL_AND:
                tokens.append(['Logical_AND', self.current_Char + self.text[self.pos + 1], self.lineno])
                self.advance()
            elif self.current_Char + self.text[self.pos + 1] in LOGICAL_OR:
                tokens.append(['Logical_OR', self.current_Char + self.text[self.pos + 1], self.lineno])
                self.advance()
            elif self.current_Char + self.text[self.pos + 1] in RELATION_OP:
                temp_value = ''
                temp_value += self.current_Char + self.text[self.pos + 1]
                self.advance()
                tokens.append(['RelationOp', temp_value, self.lineno])
                temp_value = ''
                self.advance()
            elif self.current_Char in ASSIGNMENT_OP and self.current_Char + self.text[
                self.pos + 1] not in ASSIGNMENT_OP:
                tokens.append(['AssignmentOp', self.current_Char, self.lineno])
                self.advance()
            elif self.current_Char + self.text[self.pos + 1] in ASSIGNMENT_OP:
                temp_value = ''
                temp_value += self.current_Char + self.text[self.pos + 1]
                self.advance()
                tokens.append(['AssignmentOp', temp_value, self.lineno])
                temp_value = ''
                self.advance()
            elif self.current_Char + self.text[self.pos + 1] in INC_DEC:
                temp_value = ''
                temp_value += self.current_Char + self.text[self.pos + 1]
                self.advance()
                tokens.append(['INC_DEC', temp_value, self.lineno])
                temp_value = ''
                self.advance()
            elif self.current_Char in ARTH and self.text[self.pos + 1] not in ARTH:
                tokens.append(['Arithmetic', self.current_Char, self.lineno])
                self.advance()
            elif self.current_Char in TERMINATOR:
                tokens.append(['Terminator', self.current_Char, self.lineno])
                self.advance()
            elif self.current_Char in DIGITS:
                tokens.append([self.number(), self.lineno])
            elif self.current_Char == '#' and self.text[self.pos + 1] == '-':
                temp_value = ''
                temp_value += temp_value + self.current_Char + self.text[self.pos + 1]
                self.pos += 2
                if temp_value in COMMENTS:
                    while flag is not True:
                        if self.current_Char == '\n':
                            flag = True
                            temp_value = ''
                            # self.advance()  # Remove this for Sequential Line No
                        else:
                            self.advance()
                else:
                    print('Error 404')
            elif self.current_Char == '#' and self.text[self.pos + 1] == '/':
                temp_value = ''
                temp_value += temp_value + self.current_Char + self.text[self.pos + 1]
                self.pos += 2
                if temp_value in COMMENTS:
                    while flag is not True:
                        if self.current_Char == '\n':
                            self.lineno += 1
                            self.advance()
                        elif self.current_Char == '/':
                            if self.text[self.pos + 1] == '#':
                                self.pos += 2
                                flag = True
                                temp_value = ''
                                self.lineno += 1
                                self.advance()
                            else:
                                self.advance()
                        else:
                            self.advance()
                else:
                    print('Error 404')
            else:
                temp_value = ''
                while flag is not True:
                    temp_value += self.current_Char
                    if self.text[self.pos + 1] == ' ' or self.text[self.pos + 1] == '\n' or self.text[
                        self.pos + 1] in CHARAC:
                        if temp_value in RUN_CLASS:
                            tokens.append(['Run_Class', temp_value, self.lineno])
                            temp_value = ''
                            flag = True
                            self.advance()
                        elif temp_value in DATATYPE:
                            tokens.append(['DataType', temp_value, self.lineno])
                            temp_value = ''
                            flag = True
                            self.advance()
                        elif temp_value in KEYWORDS:
                            tokens.append(['Keyword', temp_value, self.lineno])
                            temp_value = ''
                            flag = True
                            self.advance()
                        elif identify(temp_value):
                            tokens.append(['Identifier', temp_value, self.lineno])
                            temp_value = ''
                            flag = True
                            self.advance()
                        else:
                            tokens.append(['Lexical Error', temp_value, self.lineno])
                            temp_value = ''
                            flag = True
                            self.advance()
                    else:
                        self.advance()

        return tokens


def run():
    input_file = InputFile(File)
    text = input_file.data()

    lexer = Lexer(text)
    tokens = lexer.make_tokens()
    print(*tokens, sep='\n')

    with open('Tokens.txt', 'w') as Lexemes:
        for TokenSet in tokens:
            Lexemes.write("%s\n" % TokenSet)


run()

0 个答案:

没有答案