我有一个学校项目,用java代码编写一个简单的词法分析器(尽管对我来说并不是很简单)。
输入为java代码字符串或包含java代码的文件。例如:
import java.io.*;
import 25.io.*;
public class ABC {
public static int a = 25;
public String a = 25.a;
void F(int a, String b){
System.out.println("Hello World");
}
}
程序必须识别令牌并告知这些令牌是否有效。 输出格式:
Token <tab> Description/Error <tab> Line#
例如(它应该告诉25.io和25.a是无效的令牌):
import keyword 1
java identifier 1
.
..
...
; semi-colon 1
.
..
...
25 number 2
. dot 2
io invalid 2
.
..
...
25 number 5
. dot 5
a invalid 5
我找到了一种方法(使用正则表达式)来标记代码,但不知道如何验证令牌
这是我的代码:
CodeTokenizer类
public class CodeTokenizer extends JFrame {
private JPanel topMenuHolder;
private JMenuBar topMenu;
private JMenu file, edit, about;
private JMenuItem open, save, exit;
private JMenuItem copy, cut, paste;
private JMenuItem aboutDeveloper;
private JEditorPane editor;
private JButton tokenize;
private File targetFile;
public CodeTokenizer() {
//Top Menu Holder
topMenuHolder = new JPanel(new BorderLayout());
//Init Menu Bar
topMenu = new JMenuBar();
//Menus
file = new JMenu("&File");
edit = new JMenu("&Edit");
about = new JMenu("&About");
//Menu Items
open = new JMenuItem("&Open");
save = new JMenuItem("&Save");
exit = new JMenuItem("&Exit");
copy = new JMenuItem("&Copy");
cut = new JMenuItem("&Cut");
paste = new JMenuItem("&Paste");
aboutDeveloper = new JMenuItem("&About Developer");
//Adding Menus
topMenuHolder.add(topMenu, BorderLayout.CENTER);
topMenu.add(file);
topMenu.add(edit);
topMenu.add(about);
file.add(open);
file.add(save);
file.addSeparator();
file.add(exit);
edit.add(cut);
edit.add(copy);
edit.add(paste);
about.add(aboutDeveloper);
//Editor
editor = new JEditorPane();
//Tokenize Button
tokenize = new JButton("Tokenize");
//Adding to View
this.add(topMenuHolder, BorderLayout.NORTH);
this.add(new JScrollPane(editor), BorderLayout.CENTER);
this.add(tokenize, BorderLayout.SOUTH);
//Adding to Parent
this.setSize(640,480);
this.setTitle("Code Tokenizer");
this.setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE);
this.setVisible(true);
//ActionListeners
open.addActionListener(new OnClickOpen());
save.addActionListener(new OnClickSave());
exit.addActionListener(e -> System.exit(0));
cut.addActionListener(e -> editor.cut());
copy.addActionListener(e -> editor.copy());
paste.addActionListener(e -> editor.paste());
aboutDeveloper.addActionListener(new OnClickAboutDeveloper());
tokenize.addActionListener(new OnClickTokenize());
}
private class OnClickOpen implements ActionListener{
@Override
public void actionPerformed(ActionEvent event) {
JFileChooser fileChooser = new JFileChooser();
fileChooser.setDialogTitle("Open Java Code File");
fileChooser.setAcceptAllFileFilterUsed(false);
fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Java Code Files", "java"));
int result = fileChooser.showOpenDialog(CodeTokenizer.this);
if(result == JFileChooser.APPROVE_OPTION){
if((targetFile = fileChooser.getSelectedFile()) != null){
try {
editor.setPage(targetFile.toURI().toURL());
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
private class OnClickSave implements ActionListener{
@Override
public void actionPerformed(ActionEvent event) {
JFileChooser fileChooser = new JFileChooser();
fileChooser.setDialogTitle("Save Java Code File");
fileChooser.setAcceptAllFileFilterUsed(false);
fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Java Code Files", ".java"));
int result = fileChooser.showSaveDialog(CodeTokenizer.this);
if(result == JFileChooser.APPROVE_OPTION) {
if((targetFile = fileChooser.getSelectedFile()) != null) {
if(!targetFile.getName().endsWith(".java")){
try {
targetFile = new File(targetFile.getCanonicalPath()+".java");
} catch (IOException e) {
e.printStackTrace();
return;
}
}
try {
PrintWriter writer = new PrintWriter(targetFile);
writer.print(editor.getText());
writer.flush();
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
private class OnClickAboutDeveloper implements ActionListener{
@Override
public void actionPerformed(ActionEvent e) {
Dialog dialog = new AboutDeveloper();
dialog.pack();
dialog.setVisible(true);
}
}
private class OnClickTokenize implements ActionListener{
private String text;
private String []lines;
private ArrayList<Token> tokens;
@Override
public void actionPerformed(ActionEvent e) {
removeComments();
extractLines();
extractTokens();
describeTokens();
}
private void removeComments(){
text = editor.getText();
text = text.replaceAll("(?:/\\*(?:[^*]|(?:\\*+[^*/]))*\\*+/)|(?://.*)", "");
System.out.println("[INFO]: COMMENTS REMOVED.");
if(isDebugEnabled){
System.out.println();
System.out.println();
System.out.println("Text After Removing Comments:");
System.out.println();
System.out.println(text);
System.out.println();
System.out.println();
}
}
private void extractLines() {
System.out.println("[INFO]: EXTRACTING LINES...");
try {
lines = text.split("\r\n");
System.out.println("[INFO]: LINES EXTRACTED.");
if(isDebugEnabled){
System.out.println();
System.out.println();
System.out.println("Lines:");
System.out.println();
for(int i=0; i<lines.length; i++){
System.out.println("["+i+"]: "+lines[i]);
}
System.out.println();
System.out.println();
}
} catch (Exception e) {
e.printStackTrace();
}
}
private void extractTokens(){
System.out.println("[INFO]: EXTRACTING TOKENS...");
try{
tokens = new ArrayList<>();
for(int i=0; i<lines.length; i++){
String line = lines[i];
StringBuilder pattern = new StringBuilder();
pattern.append("\"(\\\\[tbnrf01234567'\"\\\\]|[^\"\\\\])*\""); //Match String Literal
pattern.append("|");
pattern.append("'.'"); //Match Character Literal
pattern.append("|");
pattern.append("[a-zA-Z_$][a-zA-Z\\d_$]*"); //Identifier Matching
pattern.append("|");
pattern.append("\\d+\\.\\d+|[\\-\\d]\\d*\\.\\d+"); //Negative & Positive Real Number Matching
pattern.append("|");
pattern.append("\\d+|[\\-\\d]\\d*"); //Negative & Positive Integer Matching
pattern.append("|");
pattern.append("\\|\\||\\&\\&|\\+\\+|\\-\\-|[\\+\\-\\*\\/\\!\\=\\>\\<]\\="); //All double symbols Matching
pattern.append("|");
pattern.append("[\\=\\!\\+\\-\\*\\/\\%\\<\\>\\(\\)\\{\\}\\[\\]\\.\\,\\;\\:\\'\\\"\\`\\~\\@\\#\\^\\&\\|\\?\\\\\\_]"); //All Single Symbols Matching
Pattern p = Pattern.compile(pattern.toString());
//Pattern p = Pattern.compile("\\\".*\\\"|\\'.\\'|[a-zA-Z_][a-zA-Z0-9_]*|\\-\\d+\\.\\d+|\\d+\\.\\d+|\\-\\d+|\\d+|[\\+\\-\\/\\*\\!\\=\\<\\>][\\=]|\\&\\&|\\|\\||\\+\\+|\\-\\-|[\\`\\~\\@\\#\\$\\^\\&\\_\\|\\!\\+\\-\\*\\/\\%\\=\\.\\(\\)\\{\\}\\[\\]\\,\\;\\:\\'\\\"\\<\\>]");
Matcher m = p.matcher(line);
while(m.find()){
tokens.add(new Token(i, m.group()));
}
}
System.out.println("[INFO]: TOKENS EXTRACTED.");
if(isDebugEnabled){
System.out.println();
System.out.println();
System.out.println("Tokens:");
System.out.println();
for(int i=0; i<tokens.size(); i++){
System.out.println(new StringBuilder().append("[Line: ").append(tokens.get(i).getLine()).append("]: ").append(tokens.get(i).getToken()));
}
System.out.println();
System.out.println();
}
}
catch (Exception e){
e.printStackTrace();
}
}
private void describeTokens(){
System.out.println("[INFO]: DESCRIBING TOKENS...");
try{
for(Token token : tokens){
token.setDescription(ParseCheck.getDescription(token.getToken()));
}
System.out.println("[INFO]: TOKENS DESCRIBED.");
if(isDebugEnabled){
System.out.println();
System.out.println();
System.out.println("Token Descriptions:");
System.out.println();
for(Token token : tokens){
System.out.println(new StringBuilder().append("[Line: ").append(token.getLine()).append("]: Token = ").append(token.getToken()).append("\tDescription: ").append(token.getDescription()));
}
}
}
catch (Exception e){
e.printStackTrace();
}
}
}
private static boolean isDebugEnabled = true;
public static void main(String []args){
CodeTokenizer tokenizer = new CodeTokenizer();
}
}
ParseCheck Class
public class ParseCheck {
private static final String keywords[] = {"abstract", "assert", "boolean", "break", "byte", "case", "catch", "char", "class", "const", "continue", "default", "do", "double", "else", "extends", "false", "final", "finally", "float", "for", "goto", "if", "implements", "import", "instanceof", "int", "interface", "long", "native", "new", "null", "package", "private", "protected", "public", "return", "short", "static", "strictfp", "super", "switch", "synchronized", "this", "throw", "throws", "transient", "true", "try", "void", "volatile", "while"};
public static boolean isKeyWord(String str){
return Arrays.binarySearch(keywords, str) > 0;
}
public static boolean isIdentifier(String str){
return str.matches("\\b([A-Za-z][A-Za-z0-9_]*)\\b");
}
public static boolean isAssignmentOperator(String str){
return str.matches("=");
}
public static boolean isUnaryOperator(String str){
return str.matches("\\+\\+|\\-\\-|\\!");
}
public static boolean isArithmeticOperator(String str){
return str.matches("[\\+\\-\\*/%]");
}
public static boolean isRelationalOperator(String str){
return str.matches("[<>=!][=]|[<>]");
}
public static boolean isConditionalOperator(String str){
return str.matches("[\\&\\|]{2}");
}
public static boolean isSymbol(String str){
return str.matches("[\\(\\)\\{\\}\\[\\]\\.\\,\\;\\:\\'\\\"]");
}
public static boolean isStringLiteral(String str){
return str.matches("\\\"(\\\\[tbnrf01234567'\"\\\\]|[^\"\\\\])*\\\"");
}
public static boolean isCharacterLiteral(String str){
return str.matches("\\'\\.\\'");
}
public static boolean isRealNumber(String str){
return str.matches("[\\-\\d]\\d+\\.\\d+|\\d+\\.\\d+");
}
public static boolean isInteger(String str){
return str.matches("[\\-]\\d+|\\d+");
}
public static String getDescription(String token){
if(isKeyWord(token)) return "Keyword";
if(isIdentifier(token)) return "Identifier";
if(isAssignmentOperator(token)) return "Assignment Operator";
if(isUnaryOperator(token)){
switch (token){
case "++":
return "Increment Operator";
case "--":
return "Decrement Operator";
default:
return "Other Unary Operator";
}
}
if(isArithmeticOperator(token)){
switch (token){
case "+":
return "Plus Operator";
case "-":
return "Minus Operator";
case "*":
return "Multiply Operator";
case "/":
return "Divide Operator";
case "%":
return "Mod Operator";
default:
return "Other Arithmatic Operator";
}
}
if(isRelationalOperator(token)){
switch (token){
case "<":
return "Less Than Operator";
case "<=":
return "Less Than Or Equal To Operator";
case ">":
return "Greater Than Operator";
case ">=":
return "Greater Than Or Equal Operator";
case "==":
return "Equal To Operator";
case "!=":
return "Not Equal To Operator";
default:
return "Other Relational Operator";
}
}
if(isConditionalOperator(token)){
switch (token){
case "&&":
return "AND Operator";
case "||":
return "OR Operator";
default:
return "Other Conditional Operator";
}
}
if(isSymbol(token)){
switch (token){
case "(":
return "Parenthesis Start";
case ")":
return "Parenthesis End";
case "{":
return "Delimiter Start";
case "}":
return "Delimiter End";
case "[":
return "Subscript Start";
case "]":
return "Subscript End";
case ".":
return "Dot Operator";
case ",":
return "Comma Operator";
case ";":
return "Semicolon";
case ":":
return "Colon";
case "'":
return "Single Quotation";
case "\"":
return "Double Quotation";
}
}
if(isStringLiteral(token)) return "String Literal";
if(isCharacterLiteral(token)) return "Character Literal";
if(isRealNumber(token)) return "Real Number";
if(isInteger(token)) return "Integer";
return "Invalid Token";
}
}
令牌类
public class Token {
private int line;
private String token;
private String description;
private boolean valid;
public Token(int line, String token) {
this.line = line;
this.token = token;
this.valid = false;
}
public int getLine() {
return line;
}
public String getToken() {
return token;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public boolean isValid() {
return valid;
}
public void setValid(boolean valid) {
this.valid = valid;
}
}
注意:我无法使用任何库,因为我的学校项目拒绝使用内置库进行java代码标记化/解析。