如何使用令牌验证在java中为java编写简单的词法分析器?

时间:2017-11-14 07:56:45

标签: java lexical-analysis

我有一个学校项目,用java代码编写一个简单的词法分析器(尽管对我来说并不是很简单)。

  

输入为java代码字符串或包含java代码的文件。例如:

import java.io.*;
import 25.io.*;
public class ABC {
    public static int a = 25;
    public String a = 25.a;
    void F(int a, String b){
        System.out.println("Hello World");
    }
}
  

程序必须识别令牌并告知这些令牌是否有效。   输出格式:

Token <tab> Description/Error <tab> Line#
  

例如(它应该告诉25.io和25.a是无效的令牌):

import    keyword    1
java      identifier 1
.
..
...
;         semi-colon 1
.
..
...
25        number     2
.         dot        2
io        invalid    2
.
..
...
25        number     5
.         dot        5
a         invalid    5
  

我找到了一种方法(使用正则表达式)来标记代码,但不知道如何验证令牌

这是我的代码:

  

CodeTokenizer类

public class CodeTokenizer extends JFrame {
    private JPanel topMenuHolder;
    private JMenuBar topMenu;
    private JMenu file, edit, about;
    private JMenuItem open, save, exit;
    private JMenuItem copy, cut, paste;
    private JMenuItem aboutDeveloper;
    private JEditorPane editor;
    private JButton tokenize;

    private File targetFile;

    public CodeTokenizer() {
        //Top Menu Holder
        topMenuHolder = new JPanel(new BorderLayout());

        //Init Menu Bar
        topMenu = new JMenuBar();

        //Menus
        file = new JMenu("&File");
        edit = new JMenu("&Edit");
        about = new JMenu("&About");

        //Menu Items
        open = new JMenuItem("&Open");
        save = new JMenuItem("&Save");
        exit = new JMenuItem("&Exit");
        copy = new JMenuItem("&Copy");
        cut = new JMenuItem("&Cut");
        paste = new JMenuItem("&Paste");
        aboutDeveloper = new JMenuItem("&About Developer");

        //Adding Menus
        topMenuHolder.add(topMenu, BorderLayout.CENTER);
        topMenu.add(file);
        topMenu.add(edit);
        topMenu.add(about);
        file.add(open);
        file.add(save);
        file.addSeparator();
        file.add(exit);
        edit.add(cut);
        edit.add(copy);
        edit.add(paste);
        about.add(aboutDeveloper);

        //Editor
        editor = new JEditorPane();

        //Tokenize Button
        tokenize = new JButton("Tokenize");

        //Adding to View
        this.add(topMenuHolder, BorderLayout.NORTH);
        this.add(new JScrollPane(editor), BorderLayout.CENTER);
        this.add(tokenize, BorderLayout.SOUTH);

        //Adding to Parent
        this.setSize(640,480);
        this.setTitle("Code Tokenizer");
        this.setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE);
        this.setVisible(true);

        //ActionListeners
        open.addActionListener(new OnClickOpen());
        save.addActionListener(new OnClickSave());
        exit.addActionListener(e -> System.exit(0));

        cut.addActionListener(e -> editor.cut());
        copy.addActionListener(e -> editor.copy());
        paste.addActionListener(e -> editor.paste());

        aboutDeveloper.addActionListener(new OnClickAboutDeveloper());

        tokenize.addActionListener(new OnClickTokenize());
    }

    private class OnClickOpen implements ActionListener{
        @Override
        public void actionPerformed(ActionEvent event) {
            JFileChooser fileChooser = new JFileChooser();
            fileChooser.setDialogTitle("Open Java Code File");
            fileChooser.setAcceptAllFileFilterUsed(false);
            fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Java Code Files", "java"));
            int result = fileChooser.showOpenDialog(CodeTokenizer.this);
            if(result == JFileChooser.APPROVE_OPTION){
                if((targetFile = fileChooser.getSelectedFile()) != null){
                    try {
                        editor.setPage(targetFile.toURI().toURL());
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            }
        }
    }

    private class OnClickSave implements ActionListener{
        @Override
        public void actionPerformed(ActionEvent event) {
            JFileChooser fileChooser = new JFileChooser();
            fileChooser.setDialogTitle("Save Java Code File");
            fileChooser.setAcceptAllFileFilterUsed(false);
            fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Java Code Files", ".java"));
            int result = fileChooser.showSaveDialog(CodeTokenizer.this);
            if(result == JFileChooser.APPROVE_OPTION) {
                if((targetFile = fileChooser.getSelectedFile()) != null) {
                    if(!targetFile.getName().endsWith(".java")){
                        try {
                            targetFile = new File(targetFile.getCanonicalPath()+".java");
                        } catch (IOException e) {
                            e.printStackTrace();
                            return;
                        }
                    }
                    try {
                        PrintWriter writer = new PrintWriter(targetFile);
                        writer.print(editor.getText());
                        writer.flush();
                        writer.close();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            }
        }
    }

    private class OnClickAboutDeveloper implements ActionListener{
        @Override
        public void actionPerformed(ActionEvent e) {
            Dialog dialog = new AboutDeveloper();
            dialog.pack();
            dialog.setVisible(true);
        }
    }

    private class OnClickTokenize implements ActionListener{
        private String text;
        private String []lines;
        private ArrayList<Token> tokens;

        @Override
        public void actionPerformed(ActionEvent e) {
            removeComments();
            extractLines();
            extractTokens();
            describeTokens();
        }

        private void removeComments(){
            text = editor.getText();
            text = text.replaceAll("(?:/\\*(?:[^*]|(?:\\*+[^*/]))*\\*+/)|(?://.*)", "");
            System.out.println("[INFO]: COMMENTS REMOVED.");

            if(isDebugEnabled){
                System.out.println();
                System.out.println();
                System.out.println("Text After Removing Comments:");
                System.out.println();
                System.out.println(text);
                System.out.println();
                System.out.println();
            }
        }

        private void extractLines() {
            System.out.println("[INFO]: EXTRACTING LINES...");
            try {
                lines = text.split("\r\n");
                System.out.println("[INFO]: LINES EXTRACTED.");
                if(isDebugEnabled){
                    System.out.println();
                    System.out.println();
                    System.out.println("Lines:");
                    System.out.println();
                    for(int i=0; i<lines.length; i++){
                        System.out.println("["+i+"]: "+lines[i]);
                    }
                    System.out.println();
                    System.out.println();
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }

        private void extractTokens(){
            System.out.println("[INFO]: EXTRACTING TOKENS...");
            try{
                tokens = new ArrayList<>();
                for(int i=0; i<lines.length; i++){
                    String line = lines[i];
                    StringBuilder pattern = new StringBuilder();
                    pattern.append("\"(\\\\[tbnrf01234567'\"\\\\]|[^\"\\\\])*\""); //Match String Literal
                    pattern.append("|");
                    pattern.append("'.'"); //Match Character Literal
                    pattern.append("|");
                    pattern.append("[a-zA-Z_$][a-zA-Z\\d_$]*"); //Identifier Matching
                    pattern.append("|");
                    pattern.append("\\d+\\.\\d+|[\\-\\d]\\d*\\.\\d+"); //Negative & Positive Real Number Matching
                    pattern.append("|");
                    pattern.append("\\d+|[\\-\\d]\\d*"); //Negative & Positive Integer Matching
                    pattern.append("|");
                    pattern.append("\\|\\||\\&\\&|\\+\\+|\\-\\-|[\\+\\-\\*\\/\\!\\=\\>\\<]\\="); //All double symbols Matching
                    pattern.append("|");
                    pattern.append("[\\=\\!\\+\\-\\*\\/\\%\\<\\>\\(\\)\\{\\}\\[\\]\\.\\,\\;\\:\\'\\\"\\`\\~\\@\\#\\^\\&\\|\\?\\\\\\_]"); //All Single Symbols Matching
                    Pattern p = Pattern.compile(pattern.toString());
                    //Pattern p = Pattern.compile("\\\".*\\\"|\\'.\\'|[a-zA-Z_][a-zA-Z0-9_]*|\\-\\d+\\.\\d+|\\d+\\.\\d+|\\-\\d+|\\d+|[\\+\\-\\/\\*\\!\\=\\<\\>][\\=]|\\&\\&|\\|\\||\\+\\+|\\-\\-|[\\`\\~\\@\\#\\$\\^\\&\\_\\|\\!\\+\\-\\*\\/\\%\\=\\.\\(\\)\\{\\}\\[\\]\\,\\;\\:\\'\\\"\\<\\>]");
                    Matcher m = p.matcher(line);
                    while(m.find()){
                        tokens.add(new Token(i, m.group()));
                    }
                }
                System.out.println("[INFO]: TOKENS EXTRACTED.");
                if(isDebugEnabled){
                    System.out.println();
                    System.out.println();
                    System.out.println("Tokens:");
                    System.out.println();
                    for(int i=0; i<tokens.size(); i++){
                        System.out.println(new StringBuilder().append("[Line: ").append(tokens.get(i).getLine()).append("]: ").append(tokens.get(i).getToken()));
                    }
                    System.out.println();
                    System.out.println();
                }
            }
            catch (Exception e){
                e.printStackTrace();
            }
        }

        private void describeTokens(){
            System.out.println("[INFO]: DESCRIBING TOKENS...");
            try{
                for(Token token : tokens){
                    token.setDescription(ParseCheck.getDescription(token.getToken()));
                }
                System.out.println("[INFO]: TOKENS DESCRIBED.");
                if(isDebugEnabled){
                    System.out.println();
                    System.out.println();
                    System.out.println("Token Descriptions:");
                    System.out.println();
                    for(Token token : tokens){
                        System.out.println(new StringBuilder().append("[Line: ").append(token.getLine()).append("]: Token = ").append(token.getToken()).append("\tDescription: ").append(token.getDescription()));
                    }
                }
            }
            catch (Exception e){
                e.printStackTrace();
            }
        }


    }

    private static boolean isDebugEnabled = true;

    public static void main(String []args){
        CodeTokenizer tokenizer = new CodeTokenizer();
    }
}
  

ParseCheck Class

public class ParseCheck {
    private static final String keywords[] = {"abstract", "assert", "boolean", "break", "byte", "case", "catch", "char", "class", "const", "continue", "default", "do", "double", "else", "extends", "false", "final", "finally", "float", "for", "goto", "if", "implements", "import", "instanceof", "int", "interface", "long", "native", "new", "null", "package", "private", "protected", "public", "return", "short", "static", "strictfp", "super", "switch", "synchronized", "this", "throw", "throws", "transient", "true", "try", "void", "volatile", "while"};

    public static boolean isKeyWord(String str){
        return Arrays.binarySearch(keywords, str) > 0;
    }

    public static boolean isIdentifier(String str){
        return str.matches("\\b([A-Za-z][A-Za-z0-9_]*)\\b");
    }

    public static boolean isAssignmentOperator(String str){
        return str.matches("=");
    }

    public static boolean isUnaryOperator(String str){
        return str.matches("\\+\\+|\\-\\-|\\!");
    }

    public static boolean isArithmeticOperator(String str){
        return str.matches("[\\+\\-\\*/%]");
    }

    public static boolean isRelationalOperator(String str){
        return str.matches("[<>=!][=]|[<>]");
    }

    public static boolean isConditionalOperator(String str){
        return str.matches("[\\&\\|]{2}");
    }

    public static boolean isSymbol(String str){
        return str.matches("[\\(\\)\\{\\}\\[\\]\\.\\,\\;\\:\\'\\\"]");
    }

    public static boolean isStringLiteral(String str){
        return str.matches("\\\"(\\\\[tbnrf01234567'\"\\\\]|[^\"\\\\])*\\\"");
    }

    public static boolean isCharacterLiteral(String str){
        return str.matches("\\'\\.\\'");
    }

    public static boolean isRealNumber(String str){
        return str.matches("[\\-\\d]\\d+\\.\\d+|\\d+\\.\\d+");
    }

    public static boolean isInteger(String str){
        return str.matches("[\\-]\\d+|\\d+");
    }

    public static String getDescription(String token){
        if(isKeyWord(token)) return "Keyword";
        if(isIdentifier(token)) return "Identifier";
        if(isAssignmentOperator(token)) return "Assignment Operator";
        if(isUnaryOperator(token)){
            switch (token){
                case "++":
                    return "Increment Operator";
                case "--":
                    return "Decrement Operator";
                default:
                    return "Other Unary Operator";
            }
        }
        if(isArithmeticOperator(token)){
            switch (token){
                case "+":
                    return "Plus Operator";
                case "-":
                    return "Minus Operator";
                case "*":
                    return "Multiply Operator";
                case "/":
                    return "Divide Operator";
                case "%":
                    return "Mod Operator";
                default:
                    return "Other Arithmatic Operator";
            }
        }
        if(isRelationalOperator(token)){
            switch (token){
                case "<":
                    return "Less Than Operator";
                case "<=":
                    return "Less Than Or Equal To Operator";
                case ">":
                    return "Greater Than Operator";
                case ">=":
                    return "Greater Than Or Equal Operator";
                case "==":
                    return "Equal To Operator";
                case "!=":
                    return "Not Equal To Operator";
                default:
                    return "Other Relational Operator";
            }
        }
        if(isConditionalOperator(token)){
            switch (token){
                case "&&":
                    return "AND Operator";
                case "||":
                    return "OR Operator";
                default:
                    return "Other Conditional Operator";
            }
        }
        if(isSymbol(token)){
            switch (token){
                case "(":
                    return "Parenthesis Start";
                case ")":
                    return "Parenthesis End";
                case "{":
                    return "Delimiter Start";
                case "}":
                    return "Delimiter End";
                case "[":
                    return "Subscript Start";
                case "]":
                    return "Subscript End";
                case ".":
                    return "Dot Operator";
                case ",":
                    return "Comma Operator";
                case ";":
                    return "Semicolon";
                case ":":
                    return "Colon";
                case "'":
                    return "Single Quotation";
                case "\"":
                    return "Double Quotation";
            }
        }
        if(isStringLiteral(token)) return "String Literal";
        if(isCharacterLiteral(token)) return "Character Literal";
        if(isRealNumber(token)) return "Real Number";
        if(isInteger(token)) return "Integer";
        return "Invalid Token";
    }
}
  

令牌类

public class Token {
    private int line;
    private String token;
    private String description;
    private boolean valid;

    public Token(int line, String token) {
        this.line = line;
        this.token = token;
        this.valid = false;
    }

    public int getLine() {
        return line;
    }

    public String getToken() {
        return token;
    }

    public String getDescription() {
        return description;
    }

    public void setDescription(String description) {
        this.description = description;
    }

    public boolean isValid() {
        return valid;
    }

    public void setValid(boolean valid) {
        this.valid = valid;
    }
}
  

注意:我无法使用任何库,因为我的学校项目拒绝使用内置库进行java代码标记化/解析。

0 个答案:

没有答案