所以我写了这个程序来标记一个包含代码的文件,并将每个标识符,关键字,数字和符号分成一个数字标记。我遇到的问题是,虽然它会为每一行中的第一个单词指定正确的标记。后面一行中的每个标记都被视为标识符。这是我的tokenizer函数,我相信问题是:
public class cmmLex {
public static boolean isLetter(char b){
char[] letters_ = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','A','B','C','D',
'E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_'};
for (int i = 0; i < letters_.length; i++) {
if(b == letters_[i])
return true;
}
return false;
}
public static boolean isNumber(char b){
char[] numbers = {'0','1','2','3','4','5','6','7','8','9'};
for (int i = 0; i < numbers.length; i++) {
if(b == numbers[i])
return true;
}
return false;
}
public static boolean isKeyword(StringBuffer str){
String[] keywords = {"int", "double", "if", "while","return","void","else"};
for (int i = 0; i < keywords.length; i++) {
if (keywords[i].equals(str.toString()))
return true;
}
return false;
}
public static boolean isSymbol(char a){
char[] symbols = {'+','-','*','/','<','>','!','=',',','.','(',')','[',']','{','}'};
for (int i = 0; i < symbols.length; i++) {
if(a == symbols[i])
return true;
}
return false;
}
public static void lexMe(StringBuffer string)
{
if(isKeyword(string)){
switch(string.toString()){
case "double":
System.out.print("0 ");
break;
case "else":
System.out.print("1 ");
break;
case "if":
System.out.print("2 ");
break;
case "int":
System.out.print("3 ");
break;
case "return":
System.out.print("4 ");
break;
case "void":
System.out.print("5 ");
break;
case "while":
System.out.print("6 ");
break;
}
}else{
System.out.print("27 ");
}
}
public static void tokenize(String line){
StringBuffer consumed = new StringBuffer();
outerloop:
for (int i = 0; i < line.length(); i++) {
char ch = line.charAt(i);
if(isLetter(ch) && consumed.length() == 0){
consumed.append(line.charAt(i));
for (int j = i+1; j < line.length(); j++) {
ch = line.charAt(j);
if(isLetter(ch) || isNumber(ch)){
consumed.append(ch);
}else{
//call lexme to tokenize string
lexMe(consumed);
//consumed.setLength(0);
i = j;
}
}
}else if(isNumber(ch) && consumed.length() == 0){
consumed.append(line.charAt(i) );
for (int j = i+1; j < line.length(); j++) {
ch = line.charAt(j);
if(isNumber(ch) || line.charAt(j) == '.'){
consumed.append(ch);
}else{
System.out.print("28 ");
i = j;
}
}
}else if (isSymbol(ch)){
switch(ch){
case '+':
System.out.print("7 ");
break;
case '-':
System.out.print("8 ");
break;
case '*':
if(line.charAt(i-1) == '/'){
break outerloop;
}else{
System.out.println("9 ");
}
break;
case '/':
if(line.charAt(i+1) == '/')
break outerloop;
else if((ch = line.charAt(i+1)) == '*'){
consumed.append(ch);
for (int j = i; j < line.length(); j++) {
ch = line.charAt(j);
if(ch == '*'){
if(ch == '/'){
break outerloop;
}
}else{
consumed.append(ch);
}
}
}else{
System.out.println("10 ");
}
break;
case '<':
if(line.charAt(i+1) == '='){
System.out.print("12 ");
break;
}
System.out.print("11 ");
break;
case '>':
if(line.charAt(i+1) == '='){
System.out.print("14 ");
break;
}
System.out.print("13 ");
break;
case '!':
if(line.charAt(i+1) == '='){
System.out.print("16 ");
break;
}
break;
case '=':
System.out.print("15 ");
break;
case ';':
System.out.print("18 ");
break;
case ',':
System.out.print("19 ");
break;
case '.':
System.out.print("20 ");
break;
case '(':
System.out.print("21 ");
break;
case ')':
System.out.print("22 ");
break;
case '[':
System.out.print("23 ");
break;
case ']':
System.out.print("24 ");
break;
case '{':
System.out.print("25 ");
break;
case '}':
System.out.print("26 ");
break;
}
}
}
System.out.println("");
}
public static void main(String[] args) throws FileNotFoundException, IOException {
File file = new File("src\\testCode.txt");
String testCode;
try {
Scanner scanner = new Scanner(file);
while (scanner.hasNextLine()) {
String line = scanner.nextLine();
tokenize(line);
}
scanner.close();
}catch (FileNotFoundException e) {
}
}
}
我遇到的另一个问题是我无法正确忽略评论块。当我使用“/ ”并打破循环时,我试图设置一个注释块布尔标志,标志仍然设置我想扫描其余的行,直到我看到“ / “然后将标志设置为false。但是这没用。有什么想法吗?
这是我文件的第一行:
int fact(int x){
应该打印出这一行:
3 27 21 3 27 22 25
以及它目前如何出现:
3 27 27 27 27 27
也许我没有正确处理空格?
答案 0 :(得分:1)
你的lexMe
方法的开关正在检查一个字符串,当你超过第一个单词时,它会比你应该收集的字符串更多。
使用调试器查看它,或者输入该值的调试打印,您将看到问题所在。
该问题似乎是因为
行 consumed.setLength(0);
在您发布的代码中被注释掉了。
有了这个问题,仍然存在问题,因为在处理“fact”字符串之后,它会立即产生一个for循环,该循环位于基于check if(isLetter(ch) && consumed.length() == 0)
的条件内,当它真正应该重新检查时条件。
我建议使用调试器来了解您的代码当前正在做什么,然后进行一些重大的重构。
注意强>
我现在已经看了另一个答案,这个答案会在(我假设没有实际运行)通过添加休息来纠正这些问题。
但是我强烈建议重新思考整个事情,因为嵌套循环和条件并使用break
会使代码真的变得混乱。
答案 1 :(得分:1)
您在tokenize()循环中遇到问题。以下是代码的更正版本:
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Scanner;
public class cmmLex {
public static boolean isLetter(char b){
char[] letters_ = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','A','B','C','D',
'E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_'};
for (int i = 0; i < letters_.length; i++) {
if(b == letters_[i])
return true;
}
return false;
}
public static boolean isNumber(char b){
char[] numbers = {'0','1','2','3','4','5','6','7','8','9'};
for (int i = 0; i < numbers.length; i++) {
if(b == numbers[i])
return true;
}
return false;
}
public static boolean isKeyword(StringBuffer str){
String[] keywords = {"int", "double", "if", "while","return","void","else"};
for (int i = 0; i < keywords.length; i++) {
if (keywords[i].equals(str.toString()))
return true;
}
return false;
}
public static boolean isSymbol(char a){
char[] symbols = {'+','-','*','/','<','>','!','=',',','.','(',')','[',']','{','}'};
for (int i = 0; i < symbols.length; i++) {
if(a == symbols[i])
return true;
}
return false;
}
public static void lexMe(StringBuffer string)
{
if(isKeyword(string)){
switch(string.toString()){
case "double":
System.out.print("0 ");
break;
case "else":
System.out.print("1 ");
break;
case "if":
System.out.print("2 ");
break;
case "int":
System.out.print("3 ");
break;
case "return":
System.out.print("4 ");
break;
case "void":
System.out.print("5 ");
break;
case "while":
System.out.print("6 ");
break;
}
}else{
System.out.print("27 ");
}
}
public static void tokenize(String line){
StringBuffer consumed = new StringBuffer();
outerloop:
for (int i = 0; i < line.length(); i++) {
char ch = line.charAt(i);
if(isLetter(ch) && consumed.length() == 0){
consumed.append(line.charAt(i));
for (int j = i+1; j < line.length(); j++) {
ch = line.charAt(j);
if(isLetter(ch) || isNumber(ch)){
consumed.append(ch);
}else{
//call lexme to tokenize string
lexMe(consumed);
consumed.setLength(0);
i = j - 1;
break;
}
}
}else if(isNumber(ch) && consumed.length() == 0){
consumed.append(line.charAt(i) );
for (int j = i+1; j < line.length(); j++) {
ch = line.charAt(j);
if(isNumber(ch) || line.charAt(j) == '.'){
consumed.append(ch);
}else{
System.out.print("28 ");
consumed.setLength(0);
i = j - 1;
break;
}
}
}else if (isSymbol(ch)){
switch(ch){
case '+':
System.out.print("7 ");
break;
case '-':
System.out.print("8 ");
break;
case '*':
if(line.charAt(i-1) == '/'){
break outerloop;
}else{
System.out.println("9 ");
}
break;
case '/':
if(line.charAt(i+1) == '/')
break outerloop;
else if((ch = line.charAt(i+1)) == '*'){
consumed.append(ch);
for (int j = i; j < line.length(); j++) {
ch = line.charAt(j);
if(ch == '*'){
if(ch == '/'){
break outerloop;
}
}else{
consumed.append(ch);
}
}
}else{
System.out.println("10 ");
}
break;
case '<':
if(line.charAt(i+1) == '='){
System.out.print("12 ");
break;
}
System.out.print("11 ");
break;
case '>':
if(line.charAt(i+1) == '='){
System.out.print("14 ");
break;
}
System.out.print("13 ");
break;
case '!':
if(line.charAt(i+1) == '='){
System.out.print("16 ");
break;
}
break;
case '=':
System.out.print("15 ");
break;
case ';':
System.out.print("18 ");
break;
case ',':
System.out.print("19 ");
break;
case '.':
System.out.print("20 ");
break;
case '(':
System.out.print("21 ");
break;
case ')':
System.out.print("22 ");
break;
case '[':
System.out.print("23 ");
break;
case ']':
System.out.print("24 ");
break;
case '{':
System.out.print("25 ");
break;
case '}':
System.out.print("26 ");
break;
}
}
}
System.out.println("");
}
public static void main(String[] args) throws FileNotFoundException, IOException {
File file = new File("src\\testCode.txt");
String testCode;
try {
Scanner scanner = new Scanner(file);
while (scanner.hasNextLine()) {
String line = scanner.nextLine();
tokenize(line);
}
scanner.close();
} catch (FileNotFoundException e) {
}
}
}
答案 2 :(得分:0)
手写词法分析器总是很难编写和调试。我建议您使用更高级别的工具来执行此操作,例如:JLex或JFlex。这将为您带来很多痛苦。