与re2c匹配行的开头

时间:2014-07-08 14:51:19

标签: lexer re2c

我使用re2c 0.10.5并且我无法正确匹配字符串的开头。

例如,我定义了:

[aA] w "="      { return ATTRKEYWORD;}
[bB] w "="      { return BWKEYWORD;}

其中w定义为:

w               =       s*;

但如果我是字符串

b=my string a=foobar

我不应该得到ATTRKEYWORD,因为它以b =开头,我想得到BWKEYWORD。

根据http://re2c.org/manual.html,^不是新行的开头。

这意味着

[^aA] w "="      { return ATTRKEYWORD;}
[^bB] w "="      { return BWKEYWORD;}

仍然无法解决我的问题。

任何解决方案?

1 个答案:

答案 0 :(得分:2)

这会找到属性关键字。

#include <stdio.h>
#include <string.h>

typedef struct lexerObj
{
    char *mCursor;
    char *mLimit;
    char *mMarker;
    char *mToken;

} lexerObj;

 #define ATTRKEYWORD  1
 #define BWKEYWORD    2
 #define OTHERKEYWORD 3

int scanner(lexerObj *aLexer)
{
#define YYFILL(n)                                \
    do {                                         \
        if (aLexer->mCursor >= aLexer->mLimit) { \
            return 0;                            \
        }                                        \
    } while (0);

    /*!re2c
      re2c:define:YYCTYPE     = "char";
      re2c:define:YYCURSOR    = aLexer->mCursor;
      re2c:define:YYLIMIT     = aLexer->mLimit;
      re2c:define:YYMARKER    = aLexer->mMarker;
      re2c:yyfill:enable      = 1;

      s = [ ];
      w = s*;
    */

    aLexer->mToken = aLexer->mCursor;

    /*!re2c
      [aA] w "=" { return ATTRKEYWORD;}
      [bB] w "=" { return BWKEYWORD;}
      .          { return OTHERKEYWORD;  }
    */
}


int main()
{
    lexerObj aObj;
    int a;
    char sToken[512];
    char *sBuffer = "b=my string a=foobar";

    aObj.mCursor = sBuffer;
    aObj.mLimit  = aObj.mCursor + strlen(sBuffer);

    while ( (a = scanner(&aObj)) != 0)
    {
        int len;

        len = aObj.mCursor - aObj.mToken;

        memset(sToken, 0, sizeof(sToken));
        strncpy(sToken, aObj.mToken, len);
        printf("Token = %d(%d) [%s]\n", a, len, sToken);
    }

    return 0;
}

这是输出。

Token = 2(2) [b=]
Token = 3(1) [m]
Token = 3(1) [y]
Token = 3(1) [ ]
Token = 3(1) [s]
Token = 3(1) [t]
Token = 3(1) [r]
Token = 3(1) [i]
Token = 3(1) [n]
Token = 3(1) [g]
Token = 3(1) [ ]
Token = 1(2) [a=]
Token = 3(1) [f]
Token = 3(1) [o]
Token = 3(1) [o]
Token = 3(1) [b]
Token = 3(1) [a]
Token = 3(1) [r]