flex如何识别子串?

时间:2017-10-23 09:33:47

标签: c linux compiler-construction flex-lexer lex

我是初学者并且正在研究c语言的词法分析。我想输出所有关键字,标识符,文字,运算符和分隔符。 这是我的程序lexer.l,它不起作用。

%{
#include<stdio.h>
int currentLine=1;
%}
%%
#include<.*> printf("%d\t<%s,%s>\n",currentLine,"include","PreProcessor");
#define[^\n]+  printf("%d\t<%s,%s>\n",currentLine,"define","PreProcessor");
= {printf("%d\t<%s,%s>\n",currentLine,yytext,"AssignmentOperator");}
int|short|signed|unsigned|long|double|float|char|void|enum|union|struct|auto|const|register|static|volatile|extern|typedef|if|else|while|do|for|switch|case|continue|break|default|sizeof|goto|return   {printf("%d\t<%s,%s>\n",currentLine,yytext,"Keyword");}
[\t ]   ;
\n currentLine++;
(\"[^\"]*\")    {printf("%d\t<%s,%s>\n",currentLine,yytext,"String Literal");}
\( printf("%d\t<%s,%s>\n",currentLine,yytext,"parenthesisOpen");
\) printf("%d\t<%s,%s>\n",currentLine,yytext,"parenthesisClose");
\{ printf("%d\t<%s,%s>\n",currentLine,yytext,"blockOpen");
\} printf("%d\t<%s,%s>\n",currentLine,yytext,"blockClose");
"+"|"-"|"/"|"*"|"<="    {printf("%d\t<%s,%s>\n",currentLine,yytext,"ArithmeticOperator");}
(\&\&)|(\|\|)|! printf("%d\t<%s,%s>\n",currentLine,yytext,"LogicalOperator");
&|\||~ printf("%d\t<%s,%s>\n",currentLine,yytext,"BitwiseOperator");
\/\/[^\n] printf("%d\t<%s,%s>\n",currentLine,yytext,"SingleLineComment");
(\/\*.*\*\/)    printf("%d\t<%s,%s>\n",currentLine,yytext,"MultiLineComment");
;   printf("%d\t<%s,%s>\n",currentLine,yytext,"Separator");
.* printf("%s\tany match\n",yytext);
%%

int yywrap(){
    return 1;
}

int main(int argc, char *argv[]){

if(argc!=2){
    printf("Invalid arguments !\n Usage: lexgen <filename>\n");
    return 1;
}
yyin=fopen(argv[1],"r");
if(yyin==0){
    printf("File not found !\n");
    return 2;
}
printf("Lexical Analyser for C :-\n");
printf("Line\tToken\n");
yylex();
fclose(yyin);
return 0;
}

输入文件:

#include<stdio.h>
#define PI 3.14
int a=5;
double
< + - *
<= >= ! ~
"hskldjh";

这是其他程序tmp.l可以正常工作(它适用于int a = 5;对于lexer.l它只是忽略)

%{
#include<stdio.h>
#include<string.h>
char err[20][50],name[20][20];
int lno=1,cnt=0,ecnt=0,elno[20];
void st_add(char *);
%}

%%
[0-9]+   {printf("%d %s Number\n",lno,yytext);}
[+-/*]   {printf("%d %s Operator\n",lno,yytext);}
=   {printf("%d %s Assignment\n",lno,yytext);}
main|return|include|if|else|switch|cin|cout|using|namespace|std {printf("%d %s Keyword\n",lno,yytext);}
int|double|char|float {printf("%d %s Data type\n",lno,yytext);}
[\t ]      ;
\n   {lno++;}
(\/\/.*) ;
(\/\*[^*/]*\*\/) ;
(\/\*[^*/]*)  {elno[ecnt]=lno;char str[100]="Unterminated comment";strcpy(err[ecnt],str);ecnt++;}
printf|scanf  {printf("%d %s Library function\n",lno,yytext);}
[a-z]+[a-zA-Z0-9]* {printf("%d %s Identifier\n",lno,yytext);st_add(yytext);}
([a-zA-Z0-9]+\.h) {printf("%d %s Header\n",lno,yytext);}
\(   {printf("%d %s Open bracket\n",lno,yytext);}
\)   {printf("%d %s Close bracket\n",lno,yytext);}
\<<   {printf("%d %s insertion\n",lno,yytext);}
\>>   {printf("%d %s extraction\n",lno,yytext);}
\{   {printf("%d %s Block start\n",lno,yytext);}
\}   {printf("%d %s Block end\n",lno,yytext);}
#   {printf("%d %s Preprocessor\n",lno,yytext);}
;   {printf("%d %s Terminator\n",lno,yytext);}
(\"[^\"]*\")  {printf("%d %s String literal\n",lno,yytext);} 
(\"[^\"\n]*\n)  {elno[ecnt]=lno;char str[100]="Unterminated quote";strcpy(err[ecnt],str);ecnt++;lno++;}

[0-9]+[a-zA-z]*  {elno[ecnt]=lno;char str[100]="Unrecognized token";strcpy(err[ecnt],str);ecnt++;}  
%%
void st_add(char s[20])
{
int i;
for(i=0;i<cnt;i++)
{
if(strcmp(name[i],s)==0)
return;
}
strcpy(name[cnt],s);
cnt++;
}

main()
{
char file[20];
printf("Enter file name:");
scanf("%s",file);
yyin=fopen(file,"r");
printf("Line No. Lexeme  Token\n");
yylex();

printf("Number of errors: %d\n",ecnt);
int i=0;
for(i=0;i<ecnt;i++)
 printf("Line no.: %2d  %s\n",elno[i],err[i]);

printf("\nSymbol Table\n");
for(i=0;i<cnt;i++)
 printf("%s\n",name[i]);
return 0;
}

int yywrap()
{
return 1;
}

tmp.l输出:

Line No. Lexeme  Token
1 # Preprocessor
1 include Keyword
<1 stdio.h Header
>2 # Preprocessor
2 define Identifier
PI2 3 Number
2 . Operator
2 14 Number
3 int Data type
3 a Identifier
3 = Assignment
3 5 Number
3 ; Terminator
4 double Data type
<5 + Operator
5 - Operator
5 * Operator
<6 = Assignment
>6 = Assignment
!~7 "hskldjh" String literal
7 ; Terminator
Number of errors: 0

Symbol Table
define
a

lexer.l输出:

Lexical Analyser for C :-
Line    Token
1   <include,PreProcessor>
2   <define,PreProcessor>
int a=5;    any match
4   <double,Keyword>
< + - * any match
<= >= ! ~   any match
"hskldjh";  any match

它甚至不匹配关系算术运算符的正则表达式。如果输入只有 int ,那么它会显示 int,关键字,但如果输入为 int a = 5 ,则忽略但 tmp。我它完美无缺! 是否应该如何在flex中编写规则?

1 个答案:

答案 0 :(得分:1)

(F)lex总是使用匹配最长的规则。规则.*将输入匹配到行尾,输入将比任何其他规则更长,除非令牌位于行的末尾。