我是初学者并且正在研究c语言的词法分析。我想输出所有关键字,标识符,文字,运算符和分隔符。 这是我的程序lexer.l,它不起作用。
%{
#include<stdio.h>
int currentLine=1;
%}
%%
#include<.*> printf("%d\t<%s,%s>\n",currentLine,"include","PreProcessor");
#define[^\n]+ printf("%d\t<%s,%s>\n",currentLine,"define","PreProcessor");
= {printf("%d\t<%s,%s>\n",currentLine,yytext,"AssignmentOperator");}
int|short|signed|unsigned|long|double|float|char|void|enum|union|struct|auto|const|register|static|volatile|extern|typedef|if|else|while|do|for|switch|case|continue|break|default|sizeof|goto|return {printf("%d\t<%s,%s>\n",currentLine,yytext,"Keyword");}
[\t ] ;
\n currentLine++;
(\"[^\"]*\") {printf("%d\t<%s,%s>\n",currentLine,yytext,"String Literal");}
\( printf("%d\t<%s,%s>\n",currentLine,yytext,"parenthesisOpen");
\) printf("%d\t<%s,%s>\n",currentLine,yytext,"parenthesisClose");
\{ printf("%d\t<%s,%s>\n",currentLine,yytext,"blockOpen");
\} printf("%d\t<%s,%s>\n",currentLine,yytext,"blockClose");
"+"|"-"|"/"|"*"|"<=" {printf("%d\t<%s,%s>\n",currentLine,yytext,"ArithmeticOperator");}
(\&\&)|(\|\|)|! printf("%d\t<%s,%s>\n",currentLine,yytext,"LogicalOperator");
&|\||~ printf("%d\t<%s,%s>\n",currentLine,yytext,"BitwiseOperator");
\/\/[^\n] printf("%d\t<%s,%s>\n",currentLine,yytext,"SingleLineComment");
(\/\*.*\*\/) printf("%d\t<%s,%s>\n",currentLine,yytext,"MultiLineComment");
; printf("%d\t<%s,%s>\n",currentLine,yytext,"Separator");
.* printf("%s\tany match\n",yytext);
%%
int yywrap(){
return 1;
}
int main(int argc, char *argv[]){
if(argc!=2){
printf("Invalid arguments !\n Usage: lexgen <filename>\n");
return 1;
}
yyin=fopen(argv[1],"r");
if(yyin==0){
printf("File not found !\n");
return 2;
}
printf("Lexical Analyser for C :-\n");
printf("Line\tToken\n");
yylex();
fclose(yyin);
return 0;
}
输入文件:
#include<stdio.h>
#define PI 3.14
int a=5;
double
< + - *
<= >= ! ~
"hskldjh";
这是其他程序tmp.l可以正常工作(它适用于int a = 5;对于lexer.l它只是忽略)
%{
#include<stdio.h>
#include<string.h>
char err[20][50],name[20][20];
int lno=1,cnt=0,ecnt=0,elno[20];
void st_add(char *);
%}
%%
[0-9]+ {printf("%d %s Number\n",lno,yytext);}
[+-/*] {printf("%d %s Operator\n",lno,yytext);}
= {printf("%d %s Assignment\n",lno,yytext);}
main|return|include|if|else|switch|cin|cout|using|namespace|std {printf("%d %s Keyword\n",lno,yytext);}
int|double|char|float {printf("%d %s Data type\n",lno,yytext);}
[\t ] ;
\n {lno++;}
(\/\/.*) ;
(\/\*[^*/]*\*\/) ;
(\/\*[^*/]*) {elno[ecnt]=lno;char str[100]="Unterminated comment";strcpy(err[ecnt],str);ecnt++;}
printf|scanf {printf("%d %s Library function\n",lno,yytext);}
[a-z]+[a-zA-Z0-9]* {printf("%d %s Identifier\n",lno,yytext);st_add(yytext);}
([a-zA-Z0-9]+\.h) {printf("%d %s Header\n",lno,yytext);}
\( {printf("%d %s Open bracket\n",lno,yytext);}
\) {printf("%d %s Close bracket\n",lno,yytext);}
\<< {printf("%d %s insertion\n",lno,yytext);}
\>> {printf("%d %s extraction\n",lno,yytext);}
\{ {printf("%d %s Block start\n",lno,yytext);}
\} {printf("%d %s Block end\n",lno,yytext);}
# {printf("%d %s Preprocessor\n",lno,yytext);}
; {printf("%d %s Terminator\n",lno,yytext);}
(\"[^\"]*\") {printf("%d %s String literal\n",lno,yytext);}
(\"[^\"\n]*\n) {elno[ecnt]=lno;char str[100]="Unterminated quote";strcpy(err[ecnt],str);ecnt++;lno++;}
[0-9]+[a-zA-z]* {elno[ecnt]=lno;char str[100]="Unrecognized token";strcpy(err[ecnt],str);ecnt++;}
%%
void st_add(char s[20])
{
int i;
for(i=0;i<cnt;i++)
{
if(strcmp(name[i],s)==0)
return;
}
strcpy(name[cnt],s);
cnt++;
}
main()
{
char file[20];
printf("Enter file name:");
scanf("%s",file);
yyin=fopen(file,"r");
printf("Line No. Lexeme Token\n");
yylex();
printf("Number of errors: %d\n",ecnt);
int i=0;
for(i=0;i<ecnt;i++)
printf("Line no.: %2d %s\n",elno[i],err[i]);
printf("\nSymbol Table\n");
for(i=0;i<cnt;i++)
printf("%s\n",name[i]);
return 0;
}
int yywrap()
{
return 1;
}
tmp.l输出:
Line No. Lexeme Token
1 # Preprocessor
1 include Keyword
<1 stdio.h Header
>2 # Preprocessor
2 define Identifier
PI2 3 Number
2 . Operator
2 14 Number
3 int Data type
3 a Identifier
3 = Assignment
3 5 Number
3 ; Terminator
4 double Data type
<5 + Operator
5 - Operator
5 * Operator
<6 = Assignment
>6 = Assignment
!~7 "hskldjh" String literal
7 ; Terminator
Number of errors: 0
Symbol Table
define
a
lexer.l输出:
Lexical Analyser for C :-
Line Token
1 <include,PreProcessor>
2 <define,PreProcessor>
int a=5; any match
4 <double,Keyword>
< + - * any match
<= >= ! ~ any match
"hskldjh"; any match
它甚至不匹配关系算术运算符的正则表达式。如果输入只有 int ,那么它会显示 int,关键字,但如果输入为 int a = 5 ,则忽略但 tmp。我它完美无缺! 是否应该如何在flex中编写规则?
答案 0 :(得分:1)
(F)lex总是使用匹配最长的规则。规则.*
将输入匹配到行尾,输入将比任何其他规则更长,除非令牌位于行的末尾。