我正在尝试为简化的java语言运行我的自制编译器,但是我总是遇到分段错误。 目标是构建一个抽象语法树。我也尝试用valgrind进行调试,但看起来这个工具在我的电脑上有一些关于bison和flex的问题。请帮帮我。
Lexfile:
%{
#include <stdio.h>
#include "abst.h"
#include "yacc.tab.h"
int yylineno;
int column = 0;
int nesting_comments = 0;
int max_comment_level = 0;
int total_comments_number = 0;
void count() {
int i;
for(i = 0; yytext[i] != '\0'; i++){
if(yytext[i] == '\n')
column = 0;
else if(yytext[i] == '\t')
column += 8 - (column % 8);
else
column++;
}
//printf("%s", yytext); Uncomment this Line for printing the input stream
}
void yyerror(char const *s);
%}
DIGIT [0-9]
LETTER [a-zA-Z]
WHITESPACE [ \t\r]
VINT {DIGIT}+
VFLOAT {VINT}+"."{DIGIT}*
VDOUBLE {VFLOAT}E{VINT}
VSTRING \".*\"
COMMENT1 "//".*
%x COMMENT2
COMMENT3 "/""*"([^{"*""/"}])*"*""/"
REL_OP "<"|"<="|"=="|"=>"|">"|"&&"|"||"|"&"|"|"|"!="
ADD_OP "+"|"-"|"%"
MUL_OP "*"|"/"
UNARY_OP "++"|"--"
ASSIGN_OP "+="|"-="|"*="|"/="|"%="
TYPE "boolean"|"int"|"float"|"double"
ID {LETTER}({LETTER}|{DIGIT})*
%%
"if" { count(); return IF; }
"else" { count(); return ELSE; }
"while" { count(); return WHILE; }
"for" { count(); return FOR; }
"read" { count(); return READ; }
"write" { count(); return WRITE; }
"final" { count(); return FINAL; }
{VINT} { count(); yylval.ival = atoi(yytext); return IVALUE; }
{VFLOAT}|{VDOUBLE} { count(); yylval.fval = atof(yytext); return FVALUE; }
{VSTRING} { count(); yylval.identifier = strdup(yytext); return STRING; }
"true" { count(); return TRUE; }
"false" { count(); return FALSE; }
"boolean" { count(); return BOOLEAN; }
"int" { count(); return INT; }
"float" { count(); return FLOAT; }
"double" { count(); return DOUBLE; }
{ID} { count(); yylval.identifier = strdup(yytext); return IDENT; }
{REL_OP} { count(); yylval.ival = getEnum(yytext); return RELOP; }
{ADD_OP} { count(); yylval.ival = getEnum(yytext); return ADDOP; }
{MUL_OP} { count(); yylval.ival = getEnum(yytext); return MULOP; }
{UNARY_OP} { count(); yylval.ival = getEnum(yytext); return UNARYOP; }
"=" { count(); return '='; }
"!" { count(); return '!'; }
"(" { count(); return '('; }
")" { count(); return ')'; }
"[" { count(); return '['; }
"]" { count(); return ']'; }
"{" { count(); return '{'; }
"}" { count(); return '}'; }
"," { count(); return ','; }
";" { count(); return ';'; }
{COMMENT1} {
total_comments_number++;
count();
}
"(*" {
total_comments_number++;
BEGIN(COMMENT2);
printf("(* [BEGIN NESTED %d]\n", nesting_comments++);
count();
}
<COMMENT2>[^*()]* {
printf("%s", yytext); /* Eat other characters */
count();
}
<COMMENT2>"(*" {
printf("(* [BEGIN NESTED %d]\n", nesting_comments);
if(++nesting_comments > max_comment_level) {
max_comment_level = nesting_comments;
}
count();
}
<COMMENT2>"*)" {
printf("\n*) [END NESTED %d]", --nesting_comments);
if (nesting_comments == 0){
BEGIN(INITIAL);
}
count();
}
<COMMENT2>[*()] {
printf("%s", yytext); /* Eat the other characters if it is not a comment delimiter */
count();
}
{COMMENT3} {
total_comments_number++;
count();
}
{WHITESPACE}+ { count(); }
\n { count(); yylineno++; }
. { yyerror("Lex"); }
%%
Yaccfile:
%{
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include "abst.h"
extern char *yytext;
extern int yylineno;
extern FILE *yyin;
extern int column;
extern int nesting_comments;
extern int max_comment_level;
extern int total_comments_number;
int yylex(void);
//int yydebug=1;
node *root;
void yyerror (char const *s){
fprintf (stderr, "%s Error at line %d:%d\n", s, yylineno, column);
fprintf (stderr, "Text was: %s\n", yytext);
exit(EXIT_FAILURE);
}
%}
%union {
char *identifier;
int ival;
float fval;
struct _node *node;
}
%type <identifier> IDENT STRING
%type <ival> IVALUE MULOP RELOP ADDOP UNARYOP
%type <fval> FVALUE
%type <node> program compStmt varDecList varListType type varList ident identArray stmtList
%type <node> statement assignStmt ifStmt elsePart whileStmt forStmt ioStmt ioCall paramList
%type <node> expr simpleExpr term factor mulop addop relop
%token IF ELSE WHILE FOR WRITE READ FINAL
%token IVALUE FVALUE TRUE FALSE STRING
%token BOOLEAN INT FLOAT DOUBLE
%token IDENT
%right UNARYOP '!' '='
%left MULOP ADDOP RELOP
%left '[' ']' '(' ')'
%start program
%%
program
: compStmt { $$ = $1; root = $1;}
;
compStmt
: '{' varDecList stmtList '}' { $$ = new_node(COMP_STMT, (node*[]){ $2, $3, NULL });}
;
varDecList
: varListType varDecList { $$ = new_node(DECLARATION, (node*[]){ $1, $2, NULL }); }
|
;
varListType
: type varList { $$ = new_node(VAR_LIST_TYPE, (node*[]){ $1, $2, NULL }); }
| FINAL type varList { $$ = new_node(VAR_LIST_TYPE, (node*[]){ new_string_node(TYPE, "final") , $2, $3, NULL }); }
;
type
: BOOLEAN { $$ = new_string_node(TYPE, "boolean"); }
| INT { $$ = new_string_node(TYPE, "int"); }
| FLOAT { $$ = new_string_node(TYPE, "float"); }
| DOUBLE { $$ = new_string_node(TYPE, "double"); }
;
varList
: ident ',' varList { $$ = new_node(VAR_LIST, (node*[]){ $1, $3, NULL }); }
| ident ';' { $$ = new_node(VAR_LIST, (node*[]){ $1, NULL }); }
| ident '=' expr ';' { $$ = new_node(VAR_LIST, (node*[]){ $1, $3, NULL }); }
;
ident
: IDENT { $$ = new_string_node(VAR, $1); }
| identArray '[' simpleExpr ']' { $$ = new_node(ARRAY, (node*[]){ $1, $3, NULL }); }
;
identArray
: IDENT { $$ = new_string_node(VAR, $1); }
;
stmtList
: statement stmtList { $$ = new_node(STATEMENT_LIST, (node*[]){ $1, $2, NULL }); }
| statement { $$ = new_node(STATEMENT_LIST, (node*[]){ $1, NULL }); }
;
statement
: assignStmt ';' { $$ = $1; }
| compStmt { $$ = $1; }
| ifStmt { $$ = $1; }
| whileStmt { $$ = $1; }
| forStmt { $$ = $1; }
| ioStmt { $$ = $1; }
;
assignStmt
: ident '=' expr { $$ = new_node(ASSIGN_STMT, (node*[]){ $1, $3, NULL }); }
| ident UNARYOP { $$ = new_node(ASSIGN_STMT, (node*[]){ $1, new_int_node(OP, $2), NULL }); }
;
ifStmt
: IF '(' expr ')' statement elsePart {
if($6->type != NONE){
$$ = new_node(IF_STMT, (node*[]){ $3, $5, $6, NULL });
}
else {
$$ = new_node(IF_STMT, (node*[]){ $3, $5, NULL });
}
}
;
elsePart
: ELSE statement { $$ = $2; }
| { $$ = new_none(); }
;
whileStmt
: WHILE '(' expr ')' statement { $$ = new_node(WHILE_STMT, (node*[]){ $3, $5, NULL }); }
;
forStmt
: FOR '(' assignStmt ';' expr ';' assignStmt ')' statement {
$$ = new_node(FOR_STMT, (node*[]){ $3, $5, $7, $9, NULL });
}
;
ioStmt
: ioCall '(' paramList ')' ';' { $$ = new_node(IOFUNC, (node*[]){ $1, $3, NULL }); }
;
ioCall
: READ { $$ = new_int_node(IOFUNCCALL, 0); }
| WRITE { $$ = new_int_node(IOFUNCCALL, 1); }
;
paramList
: ident ',' paramList { $$ = new_node(PARAM_LIST, (node*[]){ $1, $3, NULL }); }
| STRING ',' paramList { $$ = new_node(PARAM_LIST, (node*[]){ new_string_node(STRING_CONST, $1), $3, NULL }); }
| ident { $$ = new_node(PARAM_LIST, (node*[]){ $1, NULL }); }
| STRING { $$ = new_string_node(STRING_CONST, $1); }
;
expr
: simpleExpr relop simpleExpr { $$ = new_node(EXPR, (node*[]){ $1, $2, $3, NULL }); }
| simpleExpr { $$ = new_node(EXPR, (node*[]){ $1, NULL }); }
;
simpleExpr
: term addop simpleExpr { $$ = new_node(EXPR, (node*[]){ $1, $2 ,$3, NULL }); }
| term { $$ = new_node(EXPR, (node*[]){ $1, NULL }); }
;
term
: factor mulop term { $$ = new_node(EXPR, (node*[]){ $1, $2, $3, NULL }); }
| factor { $$ = new_node(EXPR, (node*[]){ $1, NULL }); }
;
factor
: IVALUE { $$ = new_int_node(INT_CONST, $1); }
| FVALUE { $$ = new_float_node(REAL_CONST, $1); }
| TRUE { $$ = new_int_node(BOOL_CONST, 1); }
| FALSE { $$ = new_int_node(BOOL_CONST, 0); }
| ident { $$ = $1; }
| '!' factor { $$ = new_node(EXPR, (node*[]){ $2, NULL }); }
| '-' factor { $$ = new_node(EXPR, (node*[]){ $2, NULL }); }
| '(' expr ')' { $$ = new_node(EXPR, (node*[]){ $2, NULL }); }
;
mulop
: MULOP { $$ = new_int_node(OP, $1); }
;
addop
: ADDOP { $$ = new_int_node(OP, $1); }
;
relop
: RELOP { $$ = new_int_node(OP, $1); }
;
%%
int main(int argc, char **argv) {
if ( argc > 1 )
yyin = fopen( argv[1], "r" );
else
yyin = stdin;
yyparse();
printf("\nCOMMENT-ANALYSIS:\n");
printf("Total number of comments: %d\n", total_comments_number);
printf("Maximum nested comment level: %d\n", max_comment_level);
printf("\n");
printNode(root);
return 0;
}
我在其中删除了一些结构和枚举的标题:
#ifndef AST_TYPES_H
#define AST_TYPES_H
typedef enum {
COMP_STMT = 0,
ASSIGN,
ASSIGN_STMT,
IF_STMT,
FOR_STMT,
WHILE_STMT,
STATEMENT,
STATEMENT_LIST,
CONST,
VAR,
ARRAY,
VAR_LIST,
TYPE,
EXPR,
SIMPLE_EXPR,
INT_CONST,
REAL_CONST,
BOOL_CONST,
STRING_CONST,
IDENTIFIER,
OP,
IOFUNC,
IOFUNCCALL,
PARAM_LIST,
DECLARATION,
VAR_LIST_TYPE,
NONE,
} node_type;
typedef enum {
PLUS = 0,
MINUS,
MUL,
DIV,
MOD,
LT,
LE,
GT,
GE,
EQ,
NE,
AND,
OR,
PLUSPLUS,
MINUSMINUS,
NOT,
} operator;
typedef union _node_value{
int iValue; /*integer, true, false, compOp, addOp, mulOp */
float fValue; /*number*/
char* identifier; /*identifier*/
/* list of BNF right-hand side symbols of nonterminal type */
struct _node **body;
} node_value;
typedef struct _node {
node_type type;
int size;
node_value value;
} node;
void printNode(node *the_node);
operator getEnum(char *value);
char *printEnum(node_type type);
node* new_none();
node* new_int_node(node_type type, int value);
node* new_float_node(node_type type, float value);
node* new_string_node(node_type type, char *value);
node* new_node(node_type type, node **nodes);
#endif //AST_TYPES_H
实现headerfunctions的c文件:
#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include "abst.h"
node* new_none(){
node *the_node = (node*) malloc(sizeof(node));
if (the_node == NULL) {
fprintf(stderr, "could not allocated memory for node\n");
exit(EXIT_FAILURE);
}
the_node->type = NONE;
the_node->size = 0;
return the_node;
}
operator getEnum(char *value){
switch (value[0]) {
case '+':
if(value[1] == '+')
return PLUSPLUS;
else
return PLUS;
case '-':
if(value[1] == '-')
return MINUSMINUS;
else
return MINUS;
case '*':
return MUL;
case '/':
return DIV;
case '%':
return MOD;
case '<':
if(value[1] == '=')
return LE;
else
return LT;
case '>':
if(value[1] == '=')
return GE;
else
return GT;
case '=':
if(value[1] == '!')
return NE;
else
return EQ;
case '&':
return AND;
case '|':
return OR;
case '!':
return NOT;
}
return -1;
}
node* new_int_node(node_type type, int value){
node *the_node = (node*) malloc(sizeof(node));
if (the_node == NULL) {
fprintf(stderr, "could not allocated memory for node\n");
exit(EXIT_FAILURE);
}
the_node->type = type;
the_node->size = 0;
switch(type){
case INT_CONST:
case OP:
case BOOL_CONST:
case IOFUNCCALL:
the_node->value.iValue = value;
break;
default:
fprintf (stderr, "Node-Type %s is wrong for int value %d\n", printEnum(type), value);
}
return the_node;
}
node* new_float_node(node_type type, float value){
node *the_node = (node*) malloc(sizeof(node));
if (the_node == NULL) {
fprintf(stderr, "could not allocated memory for node\n");
exit(EXIT_FAILURE);
}
the_node->type = type;
the_node->size = 0;
if(type == REAL_CONST)
the_node->value.fValue = value;
else
fprintf (stderr, "Node-Type %s is wrong for float value %f\n", printEnum(type), value);
return the_node;
}
node* new_string_node(node_type type, char* value){
node *the_node = (node*) malloc(sizeof(node));
if (the_node == NULL) {
fprintf(stderr, "could not allocated memory for node\n");
exit(EXIT_FAILURE);
}
the_node->type = type;
the_node->size = 0;
switch(type){
case VAR:
case STRING_CONST:
case TYPE:
the_node->value.identifier = value;
break;
default:
fprintf (stderr, "Node-Type %s is wrong for String %s\n", printEnum(type), value);
}
return the_node;
}
node* new_node(node_type type, node **nodes){
int counter = 0;
while(nodes[counter] != NULL){
counter++;
if(counter > 4){
fprintf (stderr, "Could not find NULL for %s\n", printEnum(type));
exit(EXIT_FAILURE);
}
}
node *the_node = (node*) malloc(sizeof(node));
the_node->type = type;
/*
switch(type){
case STATEMENT_LIST:
case DECLARATION:
case VAR_LIST:
if(counter == 2 && nodes[1]->type != EXPR){
int size = nodes[1]->size + 1;
the_node->size = size;
the_node->value.body = malloc(size*sizeof(node*));
the_node->value.body[0] = nodes[0];
for (int i = 1; i < size; i++) {
the_node->value.body[i] = nodes[1]->value.body[i-1];
}
return the_node;
}
}
*/
the_node->value.body = (node**) malloc(counter*sizeof(node*));
the_node->size = counter;
for (int i = 0; i < counter; i++) {
the_node->value.body[i] = nodes[i];
}
return the_node;
}
void printNode(node *the_node){
if(the_node->type > NONE){
fprintf (stderr, "Wrong Type %d\n", the_node->type);
return;
}
printEnum(the_node->type);
switch(the_node->type){
case INT_CONST:
printf("%s with INT-value: %d\n", printEnum(the_node->type), the_node->value.iValue);
break;
case REAL_CONST:
printf("%s with FLOAT-value: %f\n", printEnum(the_node->type), the_node->value.fValue);
break;
case BOOL_CONST:
printf("%s with BOOL-value: %d\n", printEnum(the_node->type), the_node->value.iValue);
break;
case VAR:
printf("%s with Identifier: %s\n", printEnum(the_node->type), the_node->value.identifier);
break;
case OP:
printf("%s with Operation: %d\n", printEnum(the_node->type), the_node->value.iValue);
break;
default:
printf("%s with size %d\n", printEnum(the_node->type), the_node->size);
for (int i = 0; i < the_node->size; i++) {
printNode(the_node->value.body[i]);
}
}
}
char *printEnum(node_type type){
switch (type) {
case COMP_STMT:
return "COMP_STMT";
case ASSIGN:
return "ASSIGN";
case ASSIGN_STMT:
return "ASSIGN_STMT";
case IF_STMT:
return "IF_STMT";
case FOR_STMT:
return "FOR_STMT";
case WHILE_STMT:
return "WHILE_STMT";
case STATEMENT:
return "STATEMENT";
case STATEMENT_LIST:
return "STATEMENT_LIST";
case CONST:
return "CONST";
case VAR:
return "VAR";
case ARRAY:
return "ARRAY";
case VAR_LIST:
return "VAR_LIST";
case TYPE:
return "TYPE";
case EXPR:
return "EXPR";
case INT_CONST:
return "INT_CONST";
case REAL_CONST:
return "REAL_CONST";
case BOOL_CONST:
return "BOOL_CONST";
case STRING_CONST:
return "STRING_CONST";
case IDENTIFIER:
return "IDENTIFIER";
case OP:
return "OP";
case IOFUNC:
return "IOFUNC";
case IOFUNCCALL:
return "IOFUNCCALL";
case PARAM_LIST:
return "PARAM_LIST";
case DECLARATION:
return "DECLARATION";
case VAR_LIST_TYPE:
return "VAR_LIST_TYPE";
case NONE:
return "NONE";
default:
printf("\nUNDEFINED: %d\n", type);
return "UNDEFINED";
}
}
我编译这样的程序:
bison -d --debug --verbose yacc.y
flex lex.l
gcc yacc.tab.c lex.yy.c abst.c -std=c11 -Wall -o run -ll -g
./run < sample.java
这是java语法的示例文件,应编译:
/* This program that implements a few well-known algorithms */
{ int x, y, z;
final int n = 10;
int a[n];
// greatest common divisor
{ read(x, y);
while(x * y != 0)
if(x > y)
x = x - y;
else y = y - x;
if(x == 0)
write("gcd = ", y);
else write("gcd = ", x);
}
// factorial calculation
{ read(x);
y = 1;
for(z = 2; z <= x; z++)
y = y * z;
write(x, " != ", y);
}
// prime number verification
{ int k;
boolean prime;
read(x);
k = 2;
prime = true;
while((k < x/2) && prime) {
if(x % k == 0)
prime = false;
k++;
}
write(x, " is prime is ", prime);
}
// maximum array element
{ int i, max;
for(i = 0; i < n; i++)
read(a[i]);
max = a[0];
for(i = 0; i < n; i++)
if(max > a[i])
max = a[i];
write("max = ", max);
}
// bubble sort
{ boolean k;
float r;
int i;
k = true;
while(k) {
k = false;
for(i = 0; i < n-1; i++)
if(a[i] > a[i+1]) {
b = a[i];
a[i] = a[i+1];
a[i+1] = b;
k = true;
}
}
write("sorted array: ");
for(i = 0; i < n; i++)
write(a[i]);
}
}
答案 0 :(得分:4)
不要直接从命令行运行程序,而是执行:
gdb run
然后在(gdb)
提示符处
run < sample.java
将产生以类似:
结尾的输出Program received signal SIGSEGV, Segmentation fault.
0x00007ffff7a66c36 in vfprintf () from /lib/x86_64-linux-gnu/libc.so.6
(gdb)
告诉您问题是传递给printf
的问题。从这里开始,使用bt
命令获取堆栈回溯:
#0 0x00007ffff7a66c36 in vfprintf () from /lib/x86_64-linux-gnu/libc.so.6
#1 0x00007ffff7a6f299 in printf () from /lib/x86_64-linux-gnu/libc.so.6
#2 0x0000000000404b14 in printNode (the_node=0x60c870) at abst.c:178
#3 0x0000000000404b94 in printNode (the_node=0x60c930) at abst.c:186
#4 0x0000000000404b94 in printNode (the_node=0x60c970) at abst.c:186
#5 0x0000000000404b94 in printNode (the_node=0x60ca70) at abst.c:186
#6 0x0000000000404b94 in printNode (the_node=0x60d190) at abst.c:186
#7 0x0000000000404b94 in printNode (the_node=0x60d6d0) at abst.c:186
#8 0x0000000000404b94 in printNode (the_node=0x60d710) at abst.c:186
#9 0x0000000000404b94 in printNode (the_node=0x60d750) at abst.c:186
#10 0x0000000000404b94 in printNode (the_node=0x613500) at abst.c:186
#11 0x0000000000404b94 in printNode (the_node=0x613540) at abst.c:186
#12 0x00000000004024a7 in main (argc=1, argv=0x7fffffffe0a8) at yacc.y:208
看看你是如何进入这种状态的。现在使用up
命令两次(在堆栈跟踪中向上移动两帧 - 在上面的列表中向下),然后你在:
#2 0x0000000000404b14 in printNode (the_node=0x60c870) at abst.c:178
178 printf("%s with Identifier: %s\n", printEnum(the_node->type), the_node->value.identifier);
从这里您可以使用p
命令打印事物的值:
(gdb) p the_node->value
$1 = {
iValue = 2,
fValue = 2.80259693e-45,
identifier = 0x2 <error: Cannot access memory at address 0x2>,
body = 0x2
}
它告诉你(直接)问题是什么 - 当你实际拥有可能是identifier
的内容时,你正在从联合访问iValue
,所以你要传递一个垃圾指针导致崩溃的printf
。
有关gdb命令的更多信息,请尝试help
命令...
答案 1 :(得分:3)
这真的是很多代码,对于简单的视觉检查来说太多了。您应该尝试使用调试器执行程序,启用bison跟踪和/或编写一些测试代码以验证您的节点构建函数是否按预期工作。 (我和三个人一起去。)
跳出来的一件事是你有很多这样的规则:
BOOLEAN { $$ = new_node(TYPE, (node*[]){ $1, NULL }); }
假设存在与表示简单关键字的终端令牌相关联的一些语义值。但是,flex规则返回关键字令牌类型
"boolean" { count(); return BOOLEAN; }
未设置yylval
。实际上,没有flex规则设置yylval ,所以它总是为NULL。 (它是一个全局变量,因此它将被零初始化。)
因此,如果用于构建上述bison规则中的新node*
节点的type
被取消引用,则会出现段错误。我没有通过代码查看它是否被解除引用,因此可能会或可能不会解释您的问题。
答案 2 :(得分:3)
您不能在.y文件中的任何位置依赖yytext
。由于 yacc 是一个LALR(1)解析器,它已经消失了。您需要将它复制到词法分析器中的一段动态内存中,并将其分配给yylval
联合的成员:
yylval.text = strdup(yytext);
在编写C ++时,不应使用节点类型变量。您应该创建一个继承自node
的节点类的系列,并使用构造函数和虚方法设置它们,以便在每种特定情况下执行它们需要执行的操作。