file uncomment.c：

Question

我设法编写代码以跳过C源代码中的//条评论：

while (fgets(string, 10000, fin) != NULL)
{
    unsigned int i;
    for (i = 0; i < strlen(string); i++)
    {
        if ((string[i] == '/') && (string[i + 1] == '/'))
        {
            while (string[i += 1] != '\n')
                continue;
        } 
    //rest of the code...

我试图对/**/评论做类似的事情：

if ((string[i] == '/') && (string[i + 1] == '*'))
{
    while (string[i += 1] != '/')
        continue;
}

if ((string[i] == '*') && (string[i + 1] == '/'))
{
    while (string[i -= 1])
        continue;
}

但它逐行读取，如果我有，例如，

/*

text*/

然后它计算文本。

我该如何解决这个问题？

Answer 1

即使您所谓的工作代码也有几个问题：

它无法识别任何上下文，因此会将//视为字符串常量或/* ... */注释中的{{1}}作为评论的开头。
万一你碰巧有很长的行，它们会被截断（包括它们的终止换行符）。

最后，C是一种面向流的语言，而不是面向行的语言。它应该以这种方式解析（逐个字符）。要正确地完成工作，您确实需要实现更复杂的解析器。如果您正在学习一种新工具，那么您可以考虑将您的程序基于Flex词法分析器。

Answer 2

C评论的简单正则表达式是：

/\*([^\*]|\*[^\/])*\*\//

（对不起转义字符）这允许除*/之外的注释中的任何序列。它转换为以下DFA（四种状态）：

状态0，输入/，下一个状态1，输出无
状态0，输入其他，下一状态0，输出读取字符
状态1，输入*，下一个状态2，无输出
状态1，输入/，下一个状态1，输出/
状态1，输入其他，下一个状态0，输出/和读取字符
状态2，输入*，下一个状态3，输出无
状态2，输入其他，下一个状态3，输出无
状态3，输入/，下一个状态0，输出无
状态3，输入*，下一个状态3，输出无
状态3，输入其他，下一个状态2，输出无

可能的输入是/，*和任何其他字符。可能的输出是输出读取字符，输出/ 和输出* 。

这转换为以下代码：

file uncomment.c：

#include <stdio.h>

int main()
{
    int c, st = 0;
    while ((c = getchar()) != EOF) {
        switch (st) {
        case 0: /* initial state */
            switch (c) {
            case '/': st = 1; break;
            default: putchar(c); break;
            } /* switch */
            break;
        case 1: /* we have read "/" */
            switch (c) {
            case '/': putchar('/'); break;
            case '*': st = 2; break;
            default: putchar('/'); putchar(c); st = 0; break;
            } /* switch */
            break;
        case 2: /* we have read "/*" */
            switch (c) {
            case '*': st = 3; break;
            default: break;
            } /* switch */
            break;
        case 3: /* we have read "/* ... *" */
            switch (c) {
            case '/': st = 0; break;
            case '*': break;
            default: st = 2; break;
            } /* switch */
            break;
        } /* switch */
    } /* while */
} /* main */

如果您要排除这两种类型的注释，我们需要在收到第二个/时切换到第五个状态，从而生成以下代码：

file uncomment2.c：

#include <stdio.h>

int main()
{
    int c, st = 0;
    while ((c = getchar()) != EOF) {
        switch (st) {
        case 0: /* initial state */
            switch (c) {
            case '/': st = 1; break;
            default: putchar(c); break;
            } /* switch */
            break;
        case 1: /* we have read "/" */
            switch (c) {
            case '/': st = 4; break;
            case '*': st = 2; break;
            default: putchar('/'); putchar(c); st = 0; break;
            } /* switch */
            break;
        case 2: /* we have read "/*" */
            switch (c) {
            case '*': st = 3; break;
            default: break;
            } /* switch */
            break;
        case 3: /* we have read "/* ... *" */
            switch (c) {
            case '/': st = 0; break;
            case '*': break;
            default: st = 2; break;
            } /* switch */
            break;
        // in the next line we put // inside an `old' comment
        // to illustrate this special case.  The switch has been put
        // after the comment to show it is not being commented out.
        case 4: /* we have read "// ..." */ switch(c) {
            case '\n': st = 0; putchar('\n'); break;
            } // switch  (to illustrate this kind of comment).
        } /* switch */
    } /* while */
} /* main */

Answer 3

这个简单的代码可以忽略注释/* */（没有处理所有案例，例如在c代码中变量的引号之间的字符串内写/ *）

#include <stdio.h> 
#include <string.h> 

typedef enum bool // false = 0 and true = 1
{ false,true}bool;
int main(int argc, char *argv[])
{
     FILE* file=fopen("file","r"); // open the file 
     bool comment=false;
     char str[1001]; // string that will contain portion of the file each time     

     if (file!=NULL)
     {
         while (fgets(str,sizeof(str),file)!=NULL)
         {
             int i=0;
             for (i=0;i<strlen(str);i++)
             {
                 if (str[i]=='/' && str[i+1] == '*')
                 {
                     comment=true; // comment true we will ignore till the end of comment
                     i++; // skip the * character 
                 }
                 else if (str[i]=='*' && str[i+1] == '/')
                 {
                     comment=false; 
                     i++; // skip the / character
                 }
                 else if (comment==false)
                 {
                     printf("%c",str[i]); // if the character not inside comment print it
                 }
             }
         }
         fclose(file);
     }

     return 0;
}

Answer 4

（目前还不是很清楚你的程序正在尝试做什么。）

使用flex计算评论之外的字符数：

%option noyywrap

%%
   int i = 0;

\"([^\\"]|\\.)*\"          { i += yyleng ; }       // treatment of strings
\/\/.*                     {               }       // C++ comments
\/\*([^*]|\*[^/])*\*\/     {               }       // C  comments
.|\n                       { i += yyleng ; }       // normal chars

<<EOF>>                    { printf("%d\n",i); return;}
%%

int main(){ 
  yylex(); 
  return 0;}

和

$ flex count-non-com.fl
$ cc -o count-non-com lex.yy.c
$ count-non-com < input

最后一个例子：flex代码删除评论（感谢@LuisColorado）

%option noyywrap 
%%

\"([^\\"]|\\.)*\"          { ECHO; }       // treatment of strings
\/\/.*                     {       }       // C++ comments
\/\*([^*]|\*[^/])*\*\/     {       }       // C  comments
.|\n                       { ECHO; }       // normal chars

%%

int main(){ 
  yylex(); 
  return 0;}

Answer 5

创建一个int变量。如果你得到/ *，扫描字符并存储索引。继续扫描，直到获得* /。如果此时变量！= 0，则假设这是结束注释标记并忽略其间的字符。

Answer 6

正如user279599所说，每当你得到＆＃39; /＆＃39;时，使用一个整数变量作为标志。＆安培; ＆＃39; ＆＃39;连续设置标志（标志= 1），然后标志值保持为1，直到获得＆＃39; ＆＃39; ＆安培; ＆＃39; /＆＃39;连续。当标志为1时忽略每个字符。

读取C源文件并跳过/ ** /注释

6 个答案:

file uncomment.c：

file uncomment2.c：