Question

因此，我正在开发一个小的编译器程序的项目，但是在继续进行其他部分之前，我很难让词法分析器在'\ BEGIN'之后输出任何内容，然后我对其进行了调试，似乎值卡在一个循环中，条件是说下一个字符始终是换行符。是因为我还没有将模式匹配添加到已定义的令牌中？

这是代码

import java.util

//import com.sun.javafx.fxml.expression.Expression.Parser.Token
/*Lexical analyzer will be responsible for the following:
- finds the lexemes
- Checks each given character determining the tokens
* */
class MyLexicalAnalyzer extends LexicalAnalyzer {
  //Array full of the keywords
//val SpecialCharacters = List(']', '#', '*', '+', '\\', '[', '(',')', "![", '=')

  val TEXT = "[a-z] | _ | 0-9 | [A-Z]:"
  private var sourceLine: String = null
  private val lexeme: Array[Char] = new Array[Char](999)
  private var nextChar: Char = 0
  private var lexLength: Int = 0
  private var position: Int = 0
  private val lexems: util.List[String] = new util.ArrayList[String]

  def start(line: String): Unit = {
    initializeLexems()
    sourceLine = line
    position = 0
    getChar()
    getNextToken()
  }
  // A helper method to determine if the current character is a space.

  private def isSpace(c: Char) = c == ' '

  //Defined and intialized tokens
    def initializeLexems(): Any =  {
   lexems.add("\\BEGIN")
    lexems.add("\\END")
   lexems.add("\\PARAB")
  lexems.add("\\DEF[")
    lexems.add("\\USE[")
  lexems.add("\\PARAE")
   lexems.add("\\TITLE[")
    lexems.add("]")
   lexems.add("[")
   lexems.add("\\")
   lexems.add("(")
  lexems.add(")")
    lexems.add("![")
   lexems.add("=")
    lexems.add("+")
  lexems.add("#")
  }
//val pattern = new regex("''").r
  def getNextToken() ={
    lexLength = 0
    // Ignore spaces and add the first character to the token
    getNonBlank()
    addChar()
    getChar()
    // Continue gathering characters for the token
    while ( {
      (nextChar != '\n') && (nextChar != ' ')
    }) {
      addChar()
      getChar()
    }
    // Convert the gathered character array token into a String
    val newToken: String = new String(lexeme)
    if (lookup(newToken.substring(0, lexLength)))
      MyCompiler.setCurrentToken(newToken.substring(0,lexLength))
  }
  // A helper method to get the next non-blank character.
  private def getNonBlank(): Unit = {
    while ( {
      isSpace(nextChar)
    }) getChar()
  }

  /*
    Method of function that adds the current character to the token
    after checking to make sure that length of the token isn't too
    long, a lexical error in this case.
   */
  def addChar(){

      if (lexLength <= 998) {
        lexeme({
          lexLength += 1; lexLength - 1
        }) = nextChar
        lexeme(lexLength) = 0
      }
    else
        System.out.println("LEXICAL ERROR - The found lexeme is too long!")
    if (!isSpace(nextChar))
      while ( {
      !isSpace(nextChar)
    })
        getChar()
    lexLength = 0
    getNonBlank()
    addChar()
  }

  //Reading from the file its obtaining the tokens
  def getChar() {
    if (position < sourceLine.length)
      nextChar = sourceLine.charAt ( {
        position += 1;
        position - 1
      })
    else nextChar = '\n'

  def lookup(candidateToken: String): Boolean ={
    if (!(lexems.contains(candidateToken))) {
      System.out.println("LEXICAL ERROR - '" + candidateToken + "' is not recognized.")
      return false
    }
    return true
  }
}

 else nextChar = '\n'<- this is where the condition goes after rendering the first character '\BEGIN' then just keeps outputting in the debug console as listed below.

这是读完'\ BEGIN'之后输出的调试控制台有人可以让我知道为什么吗？在我也多次介入之后，就会发生这种情况。

这里是使用词法分析器的驱动程序类

import scala.io.Source

object MyCompiler {
  //check the arguments
  //check file extensions
  //initialization
  //get first token
  //call start state
  var currentToken : String = ""

  def main(args: Array[String]): Unit = {
      val filename = args(0)
    //check if an input file provided
    if(args.length == 0) {
      //usage error
      println("USAGE ERROR:  Must provide an input file. ")
      System.exit(0)
    }
    if(!checkFileExtension(args(0))) {
      println("USAGE ERROR: Extension name is invalid make sure its .gtx ")
      System.exit(0)
    }

    val Scanner = new MyLexicalAnalyzer
    val Parser = new MySyntaxAnalyzer

    //getCurrentToken(Scanner.getNextToken())
    //Parser.gittex()
    for (line <- Source.fromFile(filename).getLines()){
      Scanner.start(line)
      println()
    }


    //.......
    //If it gets here, it is compiled
    //post processing

  }
  //checks the file extension if valid and ends with .gtx
  def checkFileExtension(filename : String) : Boolean =  filename.endsWith(".gtx")

  def getCurrentToken() : String = this.currentToken
  def setCurrentToken(t : String ) : Unit = this.currentToken = t
}

Answer 1

代码正在按预期的方式运行。第一行仅包含字符串\BEGIN，因此词法分析器将第一行的末尾视为“ \ n”，如以下方法所示：

def getChar() {
  if (position < sourceLine.length)
    nextChar = sourceLine.charAt ( {
      position += 1;
      position - 1
    })
  else nextChar = '\n'

但是，该方法正上方的注释未描述了该方法的实际作用。这可能暗示了您的困惑所在。如果评论说应该从文件中读取，但不是从文件中读取，则可能是您忘记了实现。

词法分析器没有得到下一个字符

1 个答案: