Question

我正在尝试做作业。分配是生成中间代码。为此，我和Lex一起运行以下Yacc程序。但是，它给了我一个分段错误。为什么会出现分段错误？这是代码。

%{
#include<stdio.h>
#include<malloc.h>
#include<string.h>
#include<ctype.h>

char datatype[5];
char temporaryVariable[5];
int assignment=0;
int tempvarnum=0;

struct sym
{
    char datatype[5];
    char varname[5];
    int size,location;
    struct sym *next;
}*first;

struct quadruple
{
    char *src1;
    char *src2;
    char *op;
    char *tmp;
    struct quadruple *next;
}*qfirst;

char* typeOf(char *lab);


void yyerror(const char *st)
{}
%}

%left '+' '-'
%left '*' '/'
%right '^'
%union 
{
    struct ICG
    {
        char *lab; 
        char code[100]; 
        char datatype[5];
    }Icg;
}
%token <Icg> ID
%token INT FLOAT CHAR
%type <Icg> E

%%
S: S Decl
 | Decl 
 | S Assn
 | Assn
 ;

Decl:Type List ';' {printf("Read declaration list");}
    ;

List:List ',' ID {printf("created node\n");createNode($3.lab,datatype);}
    | ID {printf("created node\n");createNode($1.lab,datatype);}
    ;

Type: INT {strcpy(datatype,"int");}
    | FLOAT {strcpy(datatype,"float");}
    | CHAR {strcpy(datatype,"char");}
    ;

Assn:ID '=' E ';' {printf("Assignment statement");assignment=0;}
    ;


E: E '+' E {printf("Entering code");code(&$$,&$1,&$3,'+');}
 | E '-' E {code(&$$,&$1,&$3,'-');}
 | E '*' E {code(&$$,&$1,&$3,'*');}
 | E '/' E {code(&$$,&$1,&$3,'/');}
 | ID {printf("ID");strcpy($$.code,$1.lab); strcpy($$.lab,$1.lab);strcpy($$.datatype,typeOf($1.lab));}
 ;
%%

void code(struct ICG* one, struct ICG* two, struct ICG* three, char *operator)
{
    printf("In code");
    char tempvarname[5];
    char code[100];
    sprintf(tempvarname,"t%d=",tempvarnum++);
    strcpy(one->lab,tempvarname);
    strcpy(one->lab,two->lab);
    createNode(one->lab,one->datatype);
    strcpy(code,tempvarname);
    strcat(code,two->lab);
    strcat(code,three->lab);
    strcat(code,operator);
    strcat(code,"\n");

    if(assignment==0)
    {
        strcpy(one->code,code);
        assignment=1;
    }
    else
    {
        strcat(one->code,code);
    }
    createQuadruples(two->lab,three->lab,operator,one->lab);
}

void createQuadruples(char *lab2,char*lab3,char *operator,char*lab1)
{
    struct quadruple* next=qfirst;
    if(!next)
    {
        struct quadruple* new=(struct quadruple*)malloc(sizeof(struct quadruple));
        strcpy(new->src1,lab2);
        strcpy(new->src2,lab3);
        strcpy(new->op,operator);
        strcpy(new->tmp,lab1);
        new->next=NULL;
        qfirst=new;
    }
    else
    {
        while(next->next)
        {
            next=next->next;
        }
        struct quadruple* new=(struct quadruple*)malloc(sizeof(struct quadruple));
        strcpy(new->src1,lab2);
        strcpy(new->src2,lab3);
        strcpy(new->op,operator);
        strcpy(new->tmp,lab1);
        new->next=NULL;
        next->next=new;
    }
}

void displayCode()
{
    struct quadruple* temp=qfirst;
    int i=0;
    printf("\t| %s | Label | size | location |\n","Index");
    while(temp)
    {
        printf("\t|%7d|%7s|%6s|%7s|%8s|\n",i++,temp->src1,temp->op,temp->src2,temp->tmp);
        temp=temp->next;
    }   
}

char* typeOf(char *lab)
{
    struct sym *new=first;
    while(new)
    {
        if(strcmp(new->varname,lab)==0)
        {
            return new->datatype;
        }
        new=new->next;
    }
}




void createNode(char *name, char *type)
{
    struct sym* new=first;
    int size=0;
    if(strcmp(type,"char")==0)
        size=1;
    if(strcmp(type,"float")==0)
        size=4;
    if(strcmp(type,"int")==0)
        size=2;

    if(!new)
    {
        struct sym* next=(struct sym*)malloc(sizeof(struct sym));
        strcpy(next->datatype,type);
        strcpy(next->varname,name);
        next->size=size;
        next->location=0;
        next->next=NULL;
        first=next;
    }
    else
    {
        while(new->next)
        {
            new=new->next;
        }
        struct sym* next=(struct sym*)malloc(sizeof(struct sym));
        strcpy(next->datatype,type);
        strcpy(next->varname,name);
        next->size=size;
        next->location=new->location+new->size;
        next->next=NULL;
        new->next=next;
    }
}
int main()
{
    yyparse();
    printf("In main");
    displayCode();
}

相应的lex文件是：

%{
#include<stdio.h>
#include "y.tab.h"
%}

letter [a-zA-Z]
digit [0-9]

%%
"int" {return INT;}
"float" {return FLOAT;}
"char" {return CHAR;}
"+"|"-"|"*"|"/"|"="|","|";" {return yytext[0];}
{letter}({letter}|{digit})* {yylval.Icg.lab=yytext;return ID;}
%%

我确实尝试调试程序，但它只是让我无处可去。而且我甚至不知道如何开始调试它。我确实尝试过printf语句，但我发现它们并没有多大帮助。

我唯一知道的是它正在检测标识符。

编辑：
我尝试在这个程序上使用valgrind。据说在strcpy源和目标指向同一地址。这怎么可能？

Answer 1

ID的词法分析器规则正在返回指向临时令牌缓冲区（yytext）的指针，该缓冲区将在下次调用yylex时被覆盖/更改/损坏，以便读取下一个标记。尝试：

{letter}({letter}|{digit})* {yylval.Icg.lab=strdup(yytext);return ID;}

代替。

在code函数（以及其他地方）中，您将one->lab与strcpy一起复制到内存中，而不会将指针初始化为指向任何位置。即时内存损坏。

更详细地说 - 您使用code作为第一个参数调用&$$。在调用$$之前未设置code，因此它可能包含在最后一次操作运行后遗留在其中的任何内容，但可能包含任何垃圾，因此这实际上是指向未初始化的回收内存的指针。然后在code中，调用strcpy(one->lab, ..从$$.lab读取随机垃圾指针并尝试复制到其中。

您将大量内容复制到固定大小的100字节字符缓冲区中，而无需检查溢出。溢出缓冲区将导致损坏。

Answer 2

在您使用的各个地方：

char datatype[5];

然后像这样复制到它：

Type: INT {strcpy(datatype,"int");}
    | FLOAT {strcpy(datatype,"float");}
    | CHAR {strcpy(datatype,"char");}

strcpy将复制字符串加上额外的“null”。零表示字符串的结尾。复制“float”时，最后的null会覆盖下一步。您需要保留至少6个字符。

Yacc / Lex使用此代码给出了分段错误

2 个答案: