编写Lex规则以检测引号符号

时间:2017-02-01 16:31:44

标签: c lex

我想问你如何编写Lex代码来检测引号符号(")这是我的Lex代码:

%e  1019
%p  2807
%n  371
%k  284
%a  1213
%o  1117

Q   \"         /* Definition of Quote*/
O   [0-7]
D   [0-9]
NZ  [1-9]
L   [a-zA-Z_]
A   [a-zA-Z_0-9]
H   [a-fA-F0-9]
HP  (0[xX])
E   ([Ee][+-]?{D}+)
P   ([Pp][+-]?{D}+)
FS  (f|F|l|L)
IS  (((u|U)(l|L|ll|LL)?)|((l|L|ll|LL)(u|U)?))
CP  (u|U|L)
SP  (u8|u|U|L)
ES  (\\(['"\?\\abfnrtv]|[0-7]{1,3}|x[a-fA-F0-9]+))
WS  [ \t\v\n\f]

%{
    #include <stdio.h>
    #include "y.tab.h"

    extern void yyerror(const char *);  

    extern int sym_type(const char *);  

    #define sym_type(identifier) IDENTIFIER 

    static void comment(void);
    static int check_type(void);
%}

%%
Q                       {yylval.a=strdup(yytext); return '"';} /*Rule to detect quotes*/
"/*"                                    { comment(); }
"//".*                                    { }

"include"               { yylval.a=strdup(yytext); return(INCLUDE); } /*Line 44*/
"void"                  { yylval.a=strdup(yytext); return(VOID); }
"int"                   { yylval.a=strdup(yytext); return(INT); } 
"long"                  { yylval.a=strdup(yytext); return(LONG); }
"char"                  { yylval.a=strdup(yytext); return(CHAR); }
"short"                 { yylval.a=strdup(yytext); return(SHORT); }
"signed"                { yylval.a=strdup(yytext); return(SIGNED); }
"double"                { yylval.a=strdup(yytext); return(DOUBLE); }
"unsigned"              { yylval.a=strdup(yytext); return(UNSIGNED); }
"float"                 { yylval.a=strdup(yytext); return(FLOAT); }


"auto"                  { yylval.a=strdup(yytext); return(AUTO); } 
"extern"                { yylval.a=strdup(yytext); return(EXTERN); }
"register"              { yylval.a=strdup(yytext); return(REGISTER); }
"static"                { yylval.a=strdup(yytext); return(STATIC); }


"const"                 { yylval.a=strdup(yytext); return(CONST); } 
"restrict"              { yylval.a=strdup(yytext); return(RESTRICT); }
"volatile"              { yylval.a=strdup(yytext); return(VOLATILE); }
"_Atomic"               { yylval.a=strdup(yytext); return ATOMIC; }


"break"                 { yylval.a=strdup(yytext); return(BREAK); }
"case"                  { yylval.a=strdup(yytext); return(CASE); }

"continue"              { yylval.a=strdup(yytext); return(CONTINUE); }
"default"               { yylval.a=strdup(yytext); return(DEFAULT); }
"do"                    { yylval.a=strdup(yytext); return(DO); }
"else"                  { yylval.a=strdup(yytext); return(ELSE); }
"enum"                  { yylval.a=strdup(yytext); return(ENUM); }

"for"                   { yylval.a=strdup(yytext); return(FOR); }
"goto"                  { yylval.a=strdup(yytext); return(GOTO); }
"if"                    { yylval.a=strdup(yytext); return(IF); }
"inline"                { yylval.a=strdup(yytext); return(INLINE); }


"return"                { yylval.a=strdup(yytext); return(RETURN); }
"sizeof"                { yylval.a=strdup(yytext); return(SIZEOF); }

"struct"                { yylval.a=strdup(yytext); return(STRUCT); }
"switch"                { yylval.a=strdup(yytext); return(SWITCH); }
"typedef"               { yylval.a=strdup(yytext); return(TYPEDEF); }
"union"                 { yylval.a=strdup(yytext); return(UNION); }



"while"                 { yylval.a=strdup(yytext); return(WHILE); }
"_Alignas"              { yylval.a=strdup(yytext); return ALIGNAS; }
"_Alignof"              { yylval.a=strdup(yytext); return ALIGNOF; }

"_Bool"                 { yylval.a=strdup(yytext); return BOOL; }
"_Complex"              { yylval.a=strdup(yytext); return COMPLEX; }
"_Generic"              { yylval.a=strdup(yytext); return GENERIC; }
"_Imaginary"            { yylval.a=strdup(yytext); return IMAGINARY; }
"_Noreturn"             { yylval.a=strdup(yytext); return NORETURN; }
"_Static_assert"        { yylval.a=strdup(yytext); return STATIC_ASSERT; }
"_Thread_local"         { yylval.a=strdup(yytext); return THREAD_LOCAL; }
"__func__"              { yylval.a=strdup(yytext); return FUNC_NAME; }


{L}{A}*                 {  return check_type(); } /*Line 107*/

{HP}{H}+{IS}?               { yylval.a=strdup(yytext); return I_CONSTANT; }
{NZ}{D}*{IS}?               { yylval.a=strdup(yytext); return I_CONSTANT; }
"0"{O}*{IS}?                { yylval.a=strdup(yytext); return I_CONSTANT; }
{CP}?"'"([^'\\\n]|{ES})+"'"     { return I_CONSTANT; } /**/

{D}+{E}{FS}?                { yylval.a=strdup(yytext); return F_CONSTANT; }
{D}*"."{D}+{E}?{FS}?        { yylval.a=strdup(yytext); return F_CONSTANT; }
{D}+"."{E}?{FS}?            { yylval.a=strdup(yytext); return F_CONSTANT; }
{HP}{H}+{P}{FS}?            { yylval.a=strdup(yytext);return F_CONSTANT; }
{HP}{H}*"."{H}+{P}{FS}?     { yylval.a=strdup(yytext); return F_CONSTANT; }
{HP}{H}+"."{P}{FS}?         { yylval.a=strdup(yytext); return F_CONSTANT; }

({SP}?\"([^"\\\n]|{ES})*\"{WS}*)+   { return STRING_LITERAL; }


"..."                   { yylval.a=strdup(yytext); return ELLIPSIS; }
">>="                   { yylval.a=strdup(yytext); return RIGHT_ASSIGN; }
"<<="                   { yylval.a=strdup(yytext); return LEFT_ASSIGN; }
"+="                    { yylval.a=strdup(yytext); return ADD_ASSIGN; }
"-="                    { yylval.a=strdup(yytext); return SUB_ASSIGN; }
"*="                    { yylval.a=strdup(yytext); return MUL_ASSIGN; }
"/="                    { yylval.a=strdup(yytext); return DIV_ASSIGN; }
"%="                    { yylval.a=strdup(yytext); return MOD_ASSIGN; }
"&="                    { yylval.a=strdup(yytext); return AND_ASSIGN; }
"^="                    { yylval.a=strdup(yytext); return XOR_ASSIGN; }
"|="                    { yylval.a=strdup(yytext); return OR_ASSIGN; }
">>"                    { yylval.a=strdup(yytext); return RIGHT_OP; }
"<<"                    { yylval.a=strdup(yytext); return LEFT_OP; }
"++"                    { yylval.a=strdup(yytext); return INC_OP; }
"--"                    { yylval.a=strdup(yytext); return DEC_OP; }
"->"                    { yylval.a=strdup(yytext); return PTR_OP; }
"&&"                    { yylval.a=strdup(yytext); return AND_OP; }
"||"                    { yylval.a=strdup(yytext); return OR_OP; }
"<="                    { yylval.a=strdup(yytext); return LE_OP; }
">="                    { yylval.a=strdup(yytext); return GE_OP; }
"=="                    { yylval.a=strdup(yytext); return EQ_OP; }
"!="                    { yylval.a=strdup(yytext); return NE_OP; }
";"                     { yylval.a=strdup(yytext); return ';'; }
("{"|"<%")              { yylval.a=strdup(yytext); return '{'; }
("}"|"%>")              { yylval.a=strdup(yytext); return '}'; }
","                     { yylval.a=strdup(yytext); return ','; }
":"                     { yylval.a=strdup(yytext); return ':'; }
"="                     { yylval.a=strdup(yytext); return '='; }
"("                     { yylval.a=strdup(yytext); return '('; }
")"                     { yylval.a=strdup(yytext); return ')'; }
("["|"<:")              { yylval.a=strdup(yytext); return '['; }
("]"|":>")              { yylval.a=strdup(yytext); return ']'; }
"."                     { yylval.a=strdup(yytext); return '.'; } /*Line 156*/
"&"                     { yylval.a=strdup(yytext); return '&'; }
"!"                     { yylval.a=strdup(yytext); return '!'; }
"~"                     { yylval.a=strdup(yytext); return '~'; }
"-"                     { yylval.a=strdup(yytext); return '-'; }
"+"                     { yylval.a=strdup(yytext); return '+'; }
"*"                     { yylval.a=strdup(yytext); return '*'; }
"/"                     { yylval.a=strdup(yytext); return '/'; }
"%"                     { yylval.a=strdup(yytext); return '%'; }
"<"                     { yylval.a=strdup(yytext); return '<'; }
">"                     { yylval.a=strdup(yytext); return '>'; }
"^"                     { yylval.a=strdup(yytext); return '^'; }
"|"                     { yylval.a=strdup(yytext); return '|'; }
"?"                     { yylval.a=strdup(yytext); return '?'; }
"#"                     {yylval.a=strdup(yytext); return '#'; }   /*Line 170*/




{WS}+                   { /* whitespace separates tokens */ }
.                   { /* discard bad characters */ }  /*Line 176*/

%%

int yywrap(void)        /* called at end of input */
{
    return 1;           /* terminate now */
}

static void comment(void)
{
    int c;

    while ((c = input()) != 0)
    if (c == '*')
    {
        printf("1: %d\n",c);
        while ((c = input()) == '*')
        ;
        printf("2: %d\n",c);
        if (c == '/')
        return;
        if (c == 0)
        printf("3: %d\n",c);
        break;
    }
    yyerror("unterminated comment");
}

static int check_type(void)
{
    switch (sym_type(yytext))
    {
        case TYPEDEF_NAME:                /* previously defined */
            {
            return TYPEDEF_NAME;
            }
        case ENUMERATION_CONSTANT:        /* previously defined */
        return ENUMERATION_CONSTANT;
        default:                          /* includes undefined */ /*This is when do normal declaration such as int a=3; in this case it return a*/
            {
            yylval.a=strdup(yytext);
            return IDENTIFIER;
            }
    }
}

这是Yacc代码的一些相关部分:

include
: '#' INCLUDE '<' IDENTIFIER '.' IDENTIFIER '>' {printf("The first rule\n");}
| '#' INCLUDE '"' IDENTIFIER '.' IDENTIFIER '"' {printf("The second rule\n");}
;

问题(我认为)在定义部分:Q \"当我编译Lex并与Yacc代码一起运行时。假设我输入一个句子:#include"header.h"输出为:

--accepting rule at line 170 ("#")
--accepting rule at line 44 ("include")
--accepting rule at line 176 ("?")
--accepting rule at line 176 ("?")
--accepting rule at line 176 ("?")
--accepting rule at line 107 ("header")

请注意,我在Lex代码上写了一行代码,对应上面的错误。

正如您所看到的, #include 被标记化而没有任何问题,但标头。 h 不行,结果为?。特别是,逗号。应符合规则&#34;。&#34;在第156行,但它最终成为规则176. header h 应该匹配在第107行的规则{L} {A} *中。我该怎么做解决这个问题?

还有一个问题:当我在第170行之后放Q {yylval.a=strdup(yytext); return '"';}时,我收到了错误warning, rule cannot be matched我想问为什么会这样?

  

修改

我尝试使用{Q},也重新安排了规则,即将规则与关键字放在一起(例如&#34; include&#34;,&#34; void&#34;,& #34;如果&#34;)在第一优先级。但它没有用。有谁知道如何解决这个问题?

0 个答案:

没有答案