使用C的词法分析器

时间:2017-08-16 13:03:25

标签: c compiler-construction lexical-analysis

我已完成程序读取字符串然后转换为文件输入。对于单行,它正在获得输出。我还想在输出中显示字符,整数和浮点常量和符号。 对于多行或作为输入的c程序,最后打印的数组只是相互混合而生成另一个输入。有人知道这是怎么发生的吗?

#include<stdio.h>
#include<ctype.h> 
#include<string.h>
#include<stdlib.h>

#define KEYWORDS 32
#define OPERATORS 25
#define DEBUG_MODE 1

typedef enum {FALSE,TRUE} boolean;

char *trimString(char *str)
{
  char *end;

  // Trim leading space
  while(isspace((unsigned char)*str)) str++;

  if(*str == 0)  // All spaces?
    return str;

  // Trim trailing space
  end = str + strlen(str) - 1;
  while(end > str && isspace((unsigned char)*end)) end--;

  // Write new null terminator
  *(end+1) = 0;

  return str;
}

int main()
{
    char delimit[]=" \t\r\n\v\f(){};""";
    char str[80],filename[40],*token,*keyword[30],*identifier[30],*operator[30],*literal[30],tmp[80];
    int state=0,i=0,j=0,k=0,l=0,m=0,c,ch=0,isKeyword=0,isOperator=0;
    boolean isComment=FALSE;

    char* key[] = {"auto","break", "case", "char", "const", "continue", "default", "do", "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", "long","register", "return", "short","signed", "sizeof", "static","struct", "switch","typedef","union", "unsigned", "void","volatile", "while"};

    char* operators[] = {"+", "-", "/", "*", "%", "=", "+=", "++", "--", "-=", "*=", "/=", "%=", "==", ">", "<", "!=", ">=", "<=", "&&", "||", "!", "<<", ">>", "sizeof"};

    printf("Enter the filename: ");
    gets(filename);

    FILE* file = fopen(filename, "r");

    if(file==NULL)
    {
        printf("\nError opening file with filename : \'%s\'!!\n\n",filename);
        exit(0);
    }
    while (fgets(str, sizeof(str), file))
    {   
        //Skip=FALSE;
        str[sizeof(str)-1]='\0';
        if((str[0]=='#') || (str[0]=='\n')) //remove preprocessor directives and blank lines
            goto gotoNextLine;

        if(str[0]=='/')
        {
            if(str[1]=='/')
                goto gotoNextLine;
            else if(str[1]=='*')
            {
                isComment=TRUE;
                goto gotoNextLine;
            }
        }


        if(DEBUG_MODE)      
            printf("\n\n**isComment=%d**\n\n",isComment);

        if(isComment)       // skip block comments
        {   
            c=0;
            while(str[c]!='\0')
            {
                if(str[c]=='*')
                {
                    if(str[c+1]=='/')
                    {
                        isComment=FALSE;
                        for (i=c+2; str[i]!='\0'; i++) 
                            tmp[ch++] = str[i];
                        strcpy(str,tmp);
                        break;
                    }

                }
                if(DEBUG_MODE)
                    printf("\n\n**The comment loop**\n\n");
                c++;
            }
        }

        if(!isComment)
        {
            for (token = strtok(str, delimit); token != NULL; token = strtok(NULL, delimit)) 
            {   
                isKeyword=0;
                isOperator=0;

                if(DEBUG_MODE)      
                    printf("\n\n**token=%s**\n\n",token);

                for (i=0;i<OPERATORS;i++)
                {
                    if(!strcmp(trimString(token),operators[i]))
                    {
                        isOperator=1;
                        operator[l++]=token;
                        break;//goto nextToken;
                    }
                }

                if(DEBUG_MODE)      
                    printf("\n\n**isOperator=%d**\n\n",isOperator);

                if(!isOperator)
                {
                    for (i=0;i<KEYWORDS;i++)
                    {
                        if(!strcasecmp(trimString(token),key[i]))
                        {
                            isKeyword=1;
                            keyword[j++]=token;
                            break;//goto nextToken;
                        }
                    }
                }


                if(DEBUG_MODE)      
                    printf("\n\n**isKeyword=%d**\n\n",isKeyword);


                if(!isKeyword && !isOperator)           
                {
                    i=ch=state=0;
                    if(token[0]=='"')
                    {
                        for (i=1; str[i]!='"'; i++) 
                            tmp[ch++] = token[i];
                        strcpy(token,tmp);
                        literal[m++]=token;
                        goto nextToken;
                    }
                    while(token[i]!='\0')
                    {
                        if((token[0]=='#') || (token[0]=='\n'))
                            goto nextToken;
                        switch(state)
                        {
                            case 0: if(isalpha(token[i]) || token[i]=='_')
                                {
                                    state=1;
                                    i++;
                                }
                                else
                                    state=2;
                                break;
                            case 1: if(isalnum(token[i]) || token[i]=='_')  
                                {
                                    state=1;
                                    i++;
                                }
                                else
                                    state=2;
                                break;
                            case 2: goto nextToken;
                        }

                    }

                    if(state==1)
                    {
                        identifier[k++]=token;
                        if(DEBUG_MODE)      
                            printf("\n\n**Identifier_True=%s**\n\n",token);

                    }
                }

                nextToken: ;
            }
        }
        gotoNextLine: ;
    }

    if(DEBUG_MODE)      
        printf("\n\n**Keywords_Count=%d**\n\n",j);

    if(j)
    {
        printf("\nKeywords: \n");
        for(i=0;i<j;i++)
        {
            printf("%s",keyword[i]);
            if((i+1)<j)
                printf(", ");   
        }
        printf("\n");
    }

    if(DEBUG_MODE)      
        printf("\n\n**Operators_Count=%d**\n\n",l);

    if(l)
    {
        printf("\nOperators: \n");
        for(i=0;i<l;i++)
        {
            printf("%s",operator[i]);
            if((i+1)<l)
                printf(", ");
        }
        printf("\n");
    }

    if(DEBUG_MODE)      
        printf("\n\n**Identifiers_Count=%d**\n\n",k);

    if(k)
    {
        printf("\nIdentifiers: \n");
        for(i=0;i<k;i++)
        {
            printf("%s",identifier[i]);
            if((i+1)<k)
                printf(", ");   
        }
    }


    printf("\n");
    fclose(file);

}

0 个答案:

没有答案