我正在使用Ian Piumarta的peg / leg软件来解析我正在研究的玩具语言。不幸的是,解析器的一些输入导致崩溃,Clang的地址清理程序说是由堆缓冲区溢出引起的。这是我得到的输出和相关的代码行:
==27761== ERROR: AddressSanitizer: heap-buffer-overflow on address 0x7f051b06c398 at pc 0x419e18 bp 0x7fffd2223780 sp 0x7fffd2223778
WRITE of size 8 at 0x7f051b06c398 thread T0
#0 0x419e17 in yySet grammar.c:257
#1 0x41767b in yyDone grammar.c:224
#2 0x41745e in yyparsefrom grammar.c:2061
#3 0x418dec in xr_parse_dump_from_stdin grammar.c:2129
#4 0x42a6d9 in main parse_dump.c:19
#5 0x7f051b09652c in ?? ??:0
0x7f051b06c398 is located 88 bytes to the right of 256-byte region [0x7f051b06c240,0x7f051b06c340)
allocated by thread T0 here:
#0 0x42f4e0 in malloc ??:0
#1 0x417234 in yyparsefrom grammar.c:2054
#2 0x418dec in xr_parse_dump_from_stdin grammar.c:2129
#3 0x42a6d9 in main parse_dump.c:19
#4 0x7f051b09652c in ?? ??:0
Shadow byte and word:
0x1fe0a360d873: fa
0x1fe0a360d870: fa fa fa fa fa fa fa fa
More shadow bytes:
0x1fe0a360d850: 00 00 00 00 00 00 00 00
0x1fe0a360d858: 00 00 00 00 00 00 00 00
0x1fe0a360d860: 00 00 00 00 00 00 00 00
0x1fe0a360d868: fa fa fa fa fa fa fa fa
=>0x1fe0a360d870: fa fa fa fa fa fa fa fa
0x1fe0a360d878: fa fa fa fa fa fa fa fa
0x1fe0a360d880: fa fa fa fa fa fa fa fa
0x1fe0a360d888: fa fa fa fa fa fa fa fa
0x1fe0a360d890: fa fa fa fa fa fa fa fa
Stats: 0M malloced (0M for red zones) by 136 calls
Stats: 0M realloced by 11 calls
Stats: 0M freed by 15 calls
Stats: 0M really freed by 0 calls
Stats: 3M (898 full pages) mmaped in 7 calls
mmaps by size class: 7:4095; 8:2047; 9:1023; 10:511; 11:255; 12:128; 13:64;
mallocs by size class: 7:112; 8:2; 9:3; 10:14; 11:3; 12:1; 13:1;
frees by size class: 7:8; 8:2; 9:2; 10:1; 11:1; 12:1;
rfrees by size class:
Stats: malloc large: 0 small slow: 7
==27761== ABORTING
这里是在grammar.c中分配内存的地方(从我的grammar.leg规范生成)
yyctx->valslen= 32;
yyctx->vals= (YYSTYPE *)malloc(sizeof(YYSTYPE) * yyctx->valslen);
然后在这里访问:
YY_LOCAL(void) yySet(yycontext *ctx, char *text, int count) { ctx->val[count]= ctx->yy; }
在解析之前,似乎yyctx-> val设置为yyctx-> val。
我曾经认为这意味着我正在访问一个由某些规则生成的解析器变量,该规则尚未正确定义或者其他但我在语法中看不到任何这样的实例(这无疑是有点打耳光的几乎肯定还有许多其他问题)。希望有人能够至少指出我正确的方向。谢谢!
编辑:解决Dayal rai的问题并希望提供更多有用的信息......如果我使用这个代码通常会溢出:
printf("COUNT %d w/ TEXT: %s\n", count, text);
fflush(stdout);
ctx->val[count]= ctx->yy;
和这个解析器输入:
#!./parse_dump
object $root;
method foo()
{
var i = 0;
while (i < 10) {
i = i + 1
}
}
我在缓冲区溢出之前得到了这个输出:
COUNT -2 w/ TEXT: root
COUNT -3 w/ TEXT: foo
COUNT -2 w/ TEXT: foo
COUNT -5 w/ TEXT: i
COUNT -2 w/ TEXT: 0
COUNT -1 w/ TEXT: 0
COUNT -2 w/ TEXT: 0
COUNT -2 w/ TEXT: 0
COUNT -3 w/ TEXT: 0
COUNT -2 w/ TEXT:
COUNT -2 w/ TEXT:
COUNT -6 w/ TEXT:
COUNT -2 w/ TEXT:
COUNT -2 w/ TEXT: i
COUNT -1 w/ TEXT: i
COUNT -2 w/ TEXT: i
COUNT -2 w/ TEXT: i
COUNT -3 w/ TEXT: i
COUNT -2 w/ TEXT:
COUNT -2 w/ TEXT: 10
COUNT -1 w/ TEXT: 10
COUNT -2 w/ TEXT: 10
COUNT -2 w/ TEXT: 10
COUNT -3 w/ TEXT: 10
COUNT -1 w/ TEXT:
COUNT -2 w/ TEXT:
COUNT -6 w/ TEXT:
COUNT -5 w/ TEXT: i
COUNT -2 w/ TEXT: i
我真的不明白应该如何在这里布置内存,但如果我没有Clang捕获此错误并且它最终没有破坏其他重要内存,它会解析一些程序并导致其他地方的段错误。如果我已经在这里抛弃所有这些文本,我想我也应该包括语法。这有点狡猾,主要是因为我仍然不确定语言最终会是什么样子但是我们走了:
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "internal.h"
#include "elixr.h"
#include "compile.h"
#include "types.h"
#define YYSTYPE XR
#define YY_CTX_LOCAL
#define YY_CTX_MEMBERS XR src; XR method; XR object; XR dump;
unsigned int lineNumber;
#define YY_INPUT(buf, result, max) \
{ \
int c= getc(stdin); \
if ('\n' == c || '\r' == c) ++lineNumber; \
result= (EOF == c) ? 0 : (*(buf)= c, 1); \
}
#define XR_AST1(type, a) ast_node(AST_##type, a, VAL_NIL, VAL_NIL)
#define XR_AST2(type, a, b) ast_node(AST_##type, a, b, VAL_NIL)
#define XR_AST3(type, a, b, c) ast_node(AST_##type, a, b, c)
%}
Dump = -- o1:Object { $$ = o1 = list_new(o1); }
( -- o2:Object { $$ = o1 = list_append(0, o1, o2); }
)* { ctx->dump = o1;}
Object = "object" - '$' i:ID ';' - EOL {ctx->object = (i == xr_sym("root")) ? root : object_new(0, i); }
(m:Method { object_add_method(ctx->object, m); })*
{ $$ = ctx->object; }
Method = -- "method" - i:ID a:Params -- s:Code { struct XRMethod *m = xr_method_new(i, a); printf("CODE\n-----\n"); qsend(s, "source", val_num(0)); xr_ast_compile(s, m); m->object = ctx->object; $$ = ctx->method = (XR)m; assert(ctx->object); }
Params = '(' - ')' { $$ = list_empty(); }
| '(' - p1:ID { p1 = list_new(p1); }
(',' - p2:ID { p1 = list_append(0, p1, p2); }
)* ')' { $$ = p1; }
Code = SBLOCK -- s:Statements -- CBLOCK { $$ = XR_AST1(CODE, s);}
Statements = (s1:Stmt { s1 = list_new(s1); }
(SEP s2:Stmt { s1 = list_append(0, s1, s2); })* SEP?
) { $$ = s1; }
Stmt = e:BitAndOr { $$ = XR_AST1(EXPRSTMT, e); }
| VAR i:ID !ASSIGN { $$ = XR_AST1(VDECL, i); }
| VAR i:ID ASSIGN e:BitAndOr{ $$ = XR_AST2(VINIT, i, e); }
| i:ID ASSIGN s:BitAndOr { $$ = XR_AST2(ASSIGN, i, s); }
| c:Code { $$ = c; }
| "if" - '(' - e:BitAndOr - ')' -- t:Stmt -- "else" -- f:Stmt { $$ = XR_AST3(IFELSE, e, t, f); }
| "if" - '(' - e:BitAndOr - ')' -- c:Stmt { $$ = XR_AST2(IF, e, c); }
| "while" - '(' - e:BitAndOr - ')' -- c:Stmt { $$ = XR_AST2(WHILE, e, c); }
| "debug" { $$ = XR_AST1(DEBUG, 0); }
| "print" - s:BitAndOr { $$ = XR_AST1(PRINT, s); }
#| "do" -- c:Stmt "while" - '(' - e:Cmp - ')' { $$ = XR_AST2(DOWHILE, e, c; }
BitAndOr = s:Cmp
( AND s2:Cmp { s = XR_AST2(AND, s, s2); }
| OR s2:Cmp { s = XR_AST2(OR, s, s2); }
)* { $$ = s; }
Cmp = s:Msg
( EQ s2:Msg { s = XR_AST2(EQ, s, s2); }
| NEQ s2:Msg { s = XR_AST2(NEQ, s, s2); }
| GT s2:Msg { s = XR_AST2(GT, s, s2); }
| LT s2:Msg { s = XR_AST2(LT, s, s2); }
| GTE s2:Msg { s = XR_AST2(GTE, s, s2); }
| LTE s2:Msg { s = XR_AST2(LTE, s, s2); }
)* { $$ = s; }
Msg = s:Sum
( i:ID { s = XR_AST3(SEND, s, XR_AST1(SYMBOL, i), VAL_NIL);}
| i:ID a:Arguments { s = XR_AST3(SEND, s, XR_AST1(SYMBOL, i), a); }
)* { $$ = s; }
Arguments = OPEN CLOSE - { $$ = VAL_NIL; }
| '(' - l:ExprList ')' - { $$ = l; }
List = '[' -- ']' - { puts("EMPTY LSIT\n"); $$ = list_empty(); }
| '[' -- l:ExprList -- ']' - { $$ = l; }
ExprList = h:Cmp { h = list_new(h); } (',' -- t:Cmp { list_append(0, h, t);} )* { $$ = h; }
Sum = l:Product
( PLUS r:Product { l = XR_AST2(PLUS, l, r); }
| MINUS r:Product { l = XR_AST2(MINUS, l, r); }
)* { $$ = l; }
Product = l:BinaryNot
( TIMES r:BinaryNot { l = XR_AST2(TIMES, l, r); }
| DIVIDE r:BinaryNot { l = XR_AST2(DIVIDE, l, r); }
)* { $$ = l; }
BinaryNot = v:Value { $$ = v;}
| NOT v:Value { $$ = XR_AST1(NOT, v); }
#FIXME: sort out values/symbols/ids
Value = v:NUMBER { $$ = XR_AST1(NUMBER, v); }
| v:STRING { $$ = XR_AST1(STRING, v); }
| ':' v:ID { $$ = XR_AST1(SYMBOL, v); }
| v:List { $$ = XR_AST1(LIST, v); }
| "false" - { $$ = XR_AST1(VALUE, VAL_FALSE); }
| "true" - { $$ = XR_AST1(VALUE, VAL_TRUE); }
| "nil" - { $$ = XR_AST1(VALUE, VAL_NIL); }
| "self" - { $$ = XR_AST1(SELF, 0); }
| v:ID !ASSIGN { $$ = XR_AST1(VAR, v); }
| OPEN e:BitAndOr CLOSE { $$ = e; }
NUMBER = FIXNUM | DECIMAL
FIXNUM = < '-'? [0-9]+ !'.' > - { $$ = val_num(atoi(yytext)); }
DECIMAL = < [0-9]* '.'? [0-9]+ > - { $$ = xr_double(atof(yytext)); }
ID = !KEYWORD < [a-z] [a-z0-9_]* > - { $$ = xr_sym_n(yytext, yyleng); }
#FIXME: escapes
CHAR = !EOL ('\\' [abefnrtv'"\[\]\\]
| '\\' [0-3][0-7][0-7]
| '\\' [0-7][0-7]?
| !'\\' .
)
STRING = ["] < ( !["] CHAR )* > ["] - { $$ = xr_strn(yytext, yyleng); }
SEP = (';' | EOL) --
# TODO: sort out keywords
KEYWORD = "debug" | "nil" | "while" | "self" | "else" | "true" | "false" | "if" |"var" | "print"
ASSIGN = '=' !'=' -
PLUS = '+' -
MINUS = '-' -
TIMES = '*' -
DIVIDE = '/' -
OPEN = '(' -
CLOSE = ')' -
VAR = "var" -
SBLOCK = '{' -
CBLOCK = '}' -
NOT = '!' -
EQ = '==' -
NEQ = '!=' -
GT = '>' -
LT = '<' -
GTE = '>=' -
LTE = '<=' -
OR = '||' -
AND = '&&' -
SPACE = ' ' | '\f' | '\v' | '\t'
COMMENT = '#' (!EOL .)*
EOL = '\n' | '\r\n' | '\r'
- = (SPACE | COMMENT)*
-- = (SPACE | COMMENT | EOL)*
#EOF = !.
%%
void yyerror(char *message, yycontext *yyctx)
{
char *fileName = "<stdin>";
FILE *input = stdin;
fprintf(stderr, "%s:%d: %s", fileName, lineNumber, message);
if (yyctx->text[0]) fprintf(stderr, " near token '%s'", yyctx->text);
if (yyctx->pos < yyctx->limit || !feof(input))
{
yyctx->buf[yyctx->limit]= '\0';
fprintf(stderr, " before text \"");
while (yyctx->pos < yyctx->limit)
{
if ('\n' == yyctx->buf[yyctx->pos] || '\r' == yyctx->buf[yyctx->pos]) break;
fputc(yyctx->buf[yyctx->pos++], stderr);
}
if (yyctx->pos == yyctx->limit)
{
int c;
while (EOF != (c= fgetc(input)) && '\n' != c && '\r' != c)
fputc(c, stderr);
}
fputc('\"', stderr);
}
fprintf(stderr, "\n");
}
XR xr_parse_code_from_stdin()
{
yycontext ctx;
memset(&ctx, 0, sizeof(yycontext));
while (yyparsefrom(&ctx, yy_Code));
return ctx.src;
}
XR xr_parse_method_from_stdin()
{
yycontext ctx;
memset(&ctx, 0, sizeof(yycontext));
while (yyparsefrom(&ctx, yy_Method));
return ctx.method;
}
XR xr_parse_dump_from_stdin()
{
lineNumber = 1;
yycontext ctx;
memset(&ctx, 0, sizeof(yycontext));
while (yyparsefrom(&ctx, yy_Dump));
/*yyerror("syntax error", &ctx);*/
return ctx.dump;
}
答案 0 :(得分:1)
似乎问题是我的语法导致过多的递归而且yyctx->valslen= 32;
不足以容纳它。 Leg对此没有任何界限检查所以我只是假设它会没事。我虽然vals
内存只保存了语法规则中使用的变量,但每个规则中只有~1-5个。 This guy在Windows上构建了peg / leg,并通过在#define
数组中包含vals
以及在yyPush
中检查边界的断言来修复此错误。谢谢老兄!