如何将浮点解析功能添加到C词法分析器?

时间:2014-03-25 01:25:17

标签: c lexical-analysis

我正在尝试为这个简单的词法分析器添加浮点功能,我用C编写了C语言(除此之外)。我有一些关于如何做到这一点的想法,但它们都是不完整的解决方案,主要是在Parse整数文字中加入if语句,但由于while语句,它仍然会停止并将句点计算为句点。我想在while语句中添加一个OR,但我不完全确定如何指定它只是一个句点。这是代码:

    /* front.c */
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <conio.h>
/*Global Declarations */
/*variables*/
int charClass;
char lexeme [100];
char nextChar;
int lexLen;
int token;
int nextToken;
FILE *in_fp, *fopen();

/*function declarations*/
void addChar();
void getChar();
void getNonBlank();
int lex();

/*Character classes */
#define LETTER 0
#define DIGIT 1
#define UNKNOWN 99

/*token codes*/
#define INT_LIT 10
#define FLOAT
#define IDENT 11
#define ASSIGN_OP 20
#define ADD_OP 21
#define SUB_OP 22
#define MULT_OP 23
#define DIV_OP 24
#define LEFT_PAREN 25
#define RIGHT_PAREN 26
#define MOD_OP 27
#define SEMICOL 28
#define COMMA 29
#define EXCLAMATION_MARK 30
#define AT_SIGN 31
#define POUND_SIGN 32
#define DOLLAR_SIGN 33
#define CARAT_SIGN 34
#define AMPERSAND 35
#define PERIOD_MARK 36
#define LESSTHAN_SIGN 37
#define GREATERTHAN_SIGN 38

#define QUESTION_MARK 39
#define LEFT_SQUAREBRACKET 40
#define RIGHT_SQUAREBRACKET 41
#define LEFT_CURLYBRACKET 42
#define RIGHT_CURLYBRACKET 43
#define BACKSLASH 44
#define VERTICALBAR 45

#define SINGLE_QUOTE 46

#define DOUBLE_QUOTE 47
#define COLON 48

#define UNDERSCORE 49
#define TILDE 50
#define GRAVE_ACCENT 51




/*********************/
/*main driver */
main()
{
/*Open the input data file and process its contents*/
    if ((in_fp = fopen("front.in", "r")) == NULL)
        printf("ERROR - cannot open front.in \n");
    else
    {
        getChar();
        do
        {
            lex();
        } while (nextToken != EOF);
    }
}

/***************************/
/*lookup - a function to lookup operators and parentheses
        and return the token */
int lookup(char ch)
{
    switch (ch)
    {
        case '=':
            addChar();
            nextToken = ASSIGN_OP
            break;

        case '(':
            addChar();
            nextToken = LEFT_PAREN;
            break;

        case ')':
            addChar();
            nextToken = RIGHT_PAREN;
            break;

        case '+':
            addChar();
            nextToken = ADD_OP;
            break;

        case '-':
            addChar();
            nextToken = SUB_OP;
            break;

        case '*':
            addChar();
            nextToken = MULT_OP;
            break;

        case '/':
            addChar();
            nextToken = DIV_OP;
            break;

        case '%':
            addChar();
            nextToken = MOD_OP;
            break;

        case ';':
            addChar();
            nextToken = SEMICOL;
            break;

        case ':':
            addChar();
            nextToken = COLON;
            break;

        case '"':
            addChar();
            nextToken = DOUBLE_QUOTE;
            break;

        case ',':
            addChar();
            nextToken = COMMA;
            break;

        case '.':
            addChar();
            nextToken = PERIOD_MARK;
            break;

        case '!':
            addChar();
            nextToken = EXCLAMATION_MARK;
            break;

        case '@':
            addChar();
            nextToken = AT_SIGN;
            break;

        case '#':
            addChar();
            nextToken = POUND_SIGN;
            break;

        case '$':
            addChar();
            nextToken = DOLLAR_SIGN;
            break;

        case '^':
            addChar();
            nextToken = CARAT_SIGN;
            break;

        case '&':
            addChar();
            nextToken = AMPERSAND;
            break;

        case '<':
            addChar();
            nextToken = LESSTHAN_SIGN;
            break;

        case '>':
            addChar();
            nextToken = GREATERTHAN_SIGN;
            break;

        case '?':
            addChar();
            nextToken = QUESTION_MARK;
            break;

        case '[':
            addChar();
            nextToken = LEFT_SQUAREBRACKET;
            break;

        case ']':
            addChar();
            nextToken = RIGHT_SQUAREBRACKET;
            break;

        case '{':
            addChar();
            nextToken = LEFT_CURLYBRACKET;
            break;

        case '}':
            addChar();
            nextToken = RIGHT_CURLYBRACKET;
            break;

        case '\'':
            addChar();
            nextToken = SINGLE_QUOTE;
            break;*

        case '|':
            addChar();
            nextToken = VERTICALBAR;
            break;

        case '_':
            addChar();
            nextToken = UNDERSCORE;
            break;

        case '~':
            addChar();
            nextToken = TILDE;
            break;

        case '`':
            addChar();
            nextToken = GRAVE_ACCENT;
            break;

        case '\\':
            addChar();
            nextToken = BACKSLASH;
            break;

        default:
            addChar();
            nextToken = EOF;
            break;
    }
    return nextToken;
}

/*****************************/
/* addChar = a function to add nextChar to lexeme */
void addChar()
{
    if (lexLen <= 98)
    {
        lexeme[lexLen++] = nextChar;
        lexeme[lexLen] = 0;
    }
    else
        printf("Error - lexeme is too long \n");
}

/**********************************/
/* getChar- a function to get the next character of
            input and determine its character class */
void getChar()
{
    if ((nextChar = getc(in_fp)) != EOF)
    {
        if (isalpha(nextChar))
            charClass = LETTER;
        else if (isdigit(nextChar))
                charClass = DIGIT;
            else charClass = UNKNOWN;
    }
    else
        charClass = EOF;
}

/********************************************/
/* getNonBlank - a function to call getChar until it
                    returns a non-whitespace character */
void getNonBlank()
{
    while (isspace(nextChar))
        getChar();
}

/*******************************/
/* lex - a simple lexical analyzer for arithmetic
        expressions */
int lex()
{
    lexLen = 0;
    getNonBlank();
    switch (charClass)
    {
    /*Parse identifiers */
        case LETTER:
            addChar();
            getChar();
            while (charClass == LETTER || charClass == DIGIT)
            {
                addChar();
                getChar();
            }
        nextToken = IDENT;
        break;


/*Parse integer literals and ?Floats?*/
        case DIGIT:
            addChar();
            getChar();
            while (charClass == DIGIT)
            {
                addChar();
                getChar();
            }
            nextToken = INT_LIT;
            break;

/*Parentheses and operators*/
        case UNKNOWN:
            lookup(nextChar);
            getChar();
            break;

/*EOF*/
        case EOF:
            nextToken = EOF;
            lexeme[0] = 'E';
            lexeme[1] = 'O';
            lexeme[2] = 'F';
            lexeme[3] = 0;
            break;
    }/*End of switch*/
    printf("Next token is:  %d, Next lexeme is %s\n",
        nextToken, lexeme);
    return nextToken;
} /*End of function lex*/

我在想,如果我使用了类似&#34; charClass.ch ==&#39;。&#39;&#34;在while语句中作为&#34; charClass == DIGIT&#34;的扩展。通过|| (或),但我想我可能会混淆另一种语言或做错了。我可能不会,但目前很难正确测试这个程序。

以下是我认为需要更改以获得浮动的特定部分:

    /*Parse integer literals and ?Floats?*/
    case DIGIT:
        addChar();
        getChar();
        while (charClass == DIGIT)
        {
            addChar();
            getChar();
        }
        nextToken = INT_LIT;
        break;

1 个答案:

答案 0 :(得分:1)

/*Parse integer literals and ?Floats?*/
    case DIGIT:
        addChar();
        getChar();
        while (charClass == DIGIT)
        {
            addChar();
            getChar();
        }

此时您已经知道nextChar是什么。如果它是一个点,请写一些代码来使用它以及所有后面的数字并将nextToken设置为FLOAT_LIT.否则会落到这一点:

        nextToken = INT_LIT;
        break;