我使用re2c 0.10.5并且我无法正确匹配字符串的开头。
例如,我定义了:
[aA] w "=" { return ATTRKEYWORD;}
[bB] w "=" { return BWKEYWORD;}
其中w定义为:
w = s*;
但如果我是字符串
b=my string a=foobar
我不应该得到ATTRKEYWORD,因为它以b =开头,我想得到BWKEYWORD。
根据http://re2c.org/manual.html,^不是新行的开头。
这意味着
[^aA] w "=" { return ATTRKEYWORD;}
[^bB] w "=" { return BWKEYWORD;}
仍然无法解决我的问题。
任何解决方案?
答案 0 :(得分:2)
这会找到属性关键字。
#include <stdio.h>
#include <string.h>
typedef struct lexerObj
{
char *mCursor;
char *mLimit;
char *mMarker;
char *mToken;
} lexerObj;
#define ATTRKEYWORD 1
#define BWKEYWORD 2
#define OTHERKEYWORD 3
int scanner(lexerObj *aLexer)
{
#define YYFILL(n) \
do { \
if (aLexer->mCursor >= aLexer->mLimit) { \
return 0; \
} \
} while (0);
/*!re2c
re2c:define:YYCTYPE = "char";
re2c:define:YYCURSOR = aLexer->mCursor;
re2c:define:YYLIMIT = aLexer->mLimit;
re2c:define:YYMARKER = aLexer->mMarker;
re2c:yyfill:enable = 1;
s = [ ];
w = s*;
*/
aLexer->mToken = aLexer->mCursor;
/*!re2c
[aA] w "=" { return ATTRKEYWORD;}
[bB] w "=" { return BWKEYWORD;}
. { return OTHERKEYWORD; }
*/
}
int main()
{
lexerObj aObj;
int a;
char sToken[512];
char *sBuffer = "b=my string a=foobar";
aObj.mCursor = sBuffer;
aObj.mLimit = aObj.mCursor + strlen(sBuffer);
while ( (a = scanner(&aObj)) != 0)
{
int len;
len = aObj.mCursor - aObj.mToken;
memset(sToken, 0, sizeof(sToken));
strncpy(sToken, aObj.mToken, len);
printf("Token = %d(%d) [%s]\n", a, len, sToken);
}
return 0;
}
这是输出。
Token = 2(2) [b=]
Token = 3(1) [m]
Token = 3(1) [y]
Token = 3(1) [ ]
Token = 3(1) [s]
Token = 3(1) [t]
Token = 3(1) [r]
Token = 3(1) [i]
Token = 3(1) [n]
Token = 3(1) [g]
Token = 3(1) [ ]
Token = 1(2) [a=]
Token = 3(1) [f]
Token = 3(1) [o]
Token = 3(1) [o]
Token = 3(1) [b]
Token = 3(1) [a]
Token = 3(1) [r]