删除C程序中的所有注释 - 对此代码的任何可能的改进?

时间:2013-10-08 03:06:09

标签: c

我正在从K& R书中学习C,在第一章中学习练习1.23,我必须编写一个程序,删除用户输入的一些C代码的所有注释。到目前为止,这是我完成的程序。我能做些什么改进吗?

/**
 Tuesday, 10/07/2013

 Exercise 1.23
 Write a program to remove all comments from a C 
 program. Don't forget to handle quoted strings 
 and character constants properly. C comments   
 don't nest.
**/

#include <stdio.h>
#define MAX_LENGTH 65536
#define NOT_IN_COMMENT 0
#define SINGLE_COMMENT 1
#define MULTI_COMMENT  2

main()
{
    char code[MAX_LENGTH];        /* Buffer that stores the inputted code */
    int size = 0;                 /* Length of the inputted code */
    int loop;                     /* Integer used for the for loop */
    char c;                       /* Character to input into */
    int status = NOT_IN_COMMENT;  /* Are we in a comment? What type? */
    int in_string = 0;            /* Are we inside of a string constant? */
    char last_character;          /* Value of the last character */


    /* Input all code into the buffer until escape sequence pressed */
    while ((c = getchar()) != EOF)
        code[size++] = c; 
    code[size] = '\0'; 


    /* Remove all comments from the code and display results to user */
    for (loop = 0; loop < size; loop++) {
        char current = code[loop]; 

        if (in_string) {
            if (current == '"') in_string = 0; 
            putchar(current);
        }

        else {
            if (status == NOT_IN_COMMENT) {
                if (current == '"') {
                    putchar(current);
                    in_string = 1; 
                    continue; 
                }

                if (current == '/' && last_character == '/') status = SINGLE_COMMENT;
                else if (current == '*' && last_character == '/') status = MULTI_COMMENT; 
                else if (current != '/' || (current == '/' && loop < size-1 && !(code[loop+1] == '/' || code[loop+1] == '*'))) putchar(current); 
            }

            else if (status == SINGLE_COMMENT) {
                if (current == '\n') {
                    status = NOT_IN_COMMENT; 
                    putchar('\n');
                }
            }

            else if (status == MULTI_COMMENT) {
                if (current == '/' && last_character == '*') status = NOT_IN_COMMENT; 
            }
        }

        last_character = current; 
    }
}

6 个答案:

答案 0 :(得分:4)

将你的注释剥离到一个函数中(更有用),并且一次用fgets()读取一行,last_character是不明确的(它是指最后一个,还是以前的?),这使用了更少的对putchar的调用( ),每行只有一个printf(可以使用puts),保留你正在做的大部分内容,

#include <stdio.h>
#include <string.h>
#define MAX_LENGTH 65536

#define NOT_IN_COMMENT 0
#define SINGLE_COMMENT 1
#define MULTI_COMMENT  2
int status = NOT_IN_COMMENT;  /* Are we in a comment? What type? */
int in_string = 0;            /* Are we inside of a string constant? */
char* stripcomments(char* stripped,char* code)
{
    int ndx;                      /* index for code[] */
    int ondx;                     /* index for output[] */
    char prevch;                  /* Value of the previous character */
    char ch;                      /* Character to input into */

    /* Remove all comments from the code and display results to user */
    for (ndx=ondx=0; ndx < strlen(code); ndx++)
    {
        char current = code[ndx];

        if (in_string) {
            if (current == '"') in_string = 0;
            stripped[ondx++] = current;
        }
        else {
            if (status == NOT_IN_COMMENT) {
                if (current == '"') {
                    stripped[ondx++] = current;
                    in_string = 1;
                    continue;
                }

                if (current == '/' && prevch == '/') status = SINGLE_COMMENT;
                else if (current == '*' && prevch == '/') status = MULTI_COMMENT;
                else if (current != '/' || (current == '/' && ndx < strlen(code)-1 && !(code[ndx+1] == '/' || code[ndx+1] == '*'))) stripped[ondx++] = current;
            }

            else if (status == SINGLE_COMMENT) {
                if (current == '\n') {
                    status = NOT_IN_COMMENT;
                    stripped[ondx++] = '\n';
                }
            }

            else if (status == MULTI_COMMENT) {
                if (current == '/' && prevch == '*') status = NOT_IN_COMMENT;
            }
        }
        prevch = current;
    }
    stripped[ondx] = '\0';
    return(stripped);
}

int main(void)
{
    char code[MAX_LENGTH];        /* Buffer that stores the inputted code */
    char stripped[MAX_LENGTH];

    while( fgets(code,sizeof(code),stdin) )
    {
        //printf("%s\n",code);
        //strip comments...
        stripcomments(stripped,code);
        if( strlen(stripped) > 0 ) printf("%s",stripped);
    }
}

我会留给你删除多余的空白行。

答案 1 :(得分:1)

当您处理引用的字符串时,您应该检测转义引号(\")。例如"\"/* not a comment */\""是一个有效的字符串,但我认为您的代码会从中间删除错误的注释。

如果你想要真正正确,你还应该处理行继续(在下一行继续以\结尾的行)。为了增加毛羽,你也应该处理三角形。 ??/"是一个转义引号,而一行末尾的??/是一个延续。

代码的样式看起来很不错,虽然main应该更恰当地声明为int main(void)

答案 2 :(得分:1)

对我来说很好,做得好!

也许可以通过添加一些评论来改进:)作为一个粗略的指南,为每个条件添加一个。你正在评论,但只是在循环内部的多汁部分停了下来。但代码看起来很可读。

有用吗?你测试过了吗?

如果我的字符串包含转义的双引号,那么看起来可能会失败...例如"He said, \"Hello, World!\""

答案 3 :(得分:0)

//G H PATEL COLLEGE OF ENGINEERING & TECHNOLOGY.    
//c program to remove comments from given src.txt file, and write back to dest.txt file.
#include <stdio.h>
int main()
{
    FILE *src,*dest;
    char ch,pre,line[100];
    int nflag,qflag,index;

    src=fopen("src.txt","r+");
    dest=fopen("dest.txt","w+");

    nflag=1;

    while(!feof(src))
    {
        index=0;
        for(ch=fgetc(src);ch!=EOF && ch!='\n';)
        {
            if(ch=='"'&&pre!='\\')
            {
                qflag=0;
                for(;ch!='\n' && qflag==0;)
                {
                    line[index++]=ch;
                    pre=ch;
                    ch=fgetc(src);
                    if(ch=='"'&&pre!='\\')
                    {
                        qflag=1;
                        line[index++]=ch;
                        pre=ch;
                        ch=fgetc(src);
                        break;
                    }
                }
            }
            else if(ch=='/')
            {
                pre=ch;
                ch=fgetc(src);
                if(ch=='/')
                {
                    for(;fgetc(src)!='\n';);
                    break;
                }
                if(ch=='*')
                {
                    nflag=1;
                    for(ch=fgetc(src);nflag==1;)
                    {
                        if(ch=='*')
                        {
                            pre=ch;
                            ch=fgetc(src);
                            if(ch=='/')
                            {
                                nflag=0;
                            }
                        }
                        else
                        {
                            pre=ch;
                            ch=fgetc(src);
                        }
                    }
                }
            }
            else
            {
                line[index++]=ch;
                pre=ch;
                ch=fgetc(src);
            }
        }
        line[index]='\0';
        if(index>0)
        {
            line[index] = '\0';
            fprintf(dest,"%s\n",line);
            fflush(stdin);
        }
    }
    getch();
    fclose(src);
    fclose(dest);
    return 0;
}

答案 4 :(得分:0)

我喜欢这个帖子包含一个&#34;评论脱衣舞娘&#34;在将它交给JSON解析器之前的项目中。只有我更喜欢FSM方法。希望我的实现对任何人都是可以理解和有用的:

#include <stdio.h>
#include <string.h>

void strip(int ch, FILE *stream)
{
    static enum strip_states {
        STRIP_STATE_PUTC = 0,
        STRIP_STATE_SINGLE,
        STRIP_STATE_MULTI,
        STRIP_STATE_STRING,
    } state = STRIP_STATE_PUTC;
    static char _ch = 0;
    static unsigned char _nestlevel = 0;

    /* String */
    if (state == STRIP_STATE_PUTC &&
        ch == '"') {
        state = STRIP_STATE_STRING;
    } else if (state == STRIP_STATE_STRING &&
        ch == '"') {
        state = STRIP_STATE_PUTC;
    /* Multiline */
    } else if (_ch == '/' && ch == '*') {
        if (state == STRIP_STATE_PUTC)
            state = STRIP_STATE_MULTI;
        else if (state == STRIP_STATE_MULTI)
            _nestlevel++;
    } else if (_ch == '*' && ch == '/') {
        if (state == STRIP_STATE_MULTI &&
            _nestlevel > 0)
            _nestlevel--;
        else if (state == STRIP_STATE_MULTI &&
            _nestlevel == 0)
            state = STRIP_STATE_PUTC;
    /* Singleline */
    } else if (state == STRIP_STATE_PUTC &&
            _ch == '/' && ch == '/') {
        state = STRIP_STATE_SINGLE;
    } else if (state == STRIP_STATE_SINGLE &&
            ch == '\n') {
        state = STRIP_STATE_PUTC;
    }

    /* Put character */
    if ((state == STRIP_STATE_PUTC && ch != '/') ||
        state == STRIP_STATE_STRING)
        fputc(ch, stream);

    _ch = ch;
}

int main(void)
{
    int ch;
    while ((ch = fgetc(stdin)) != EOF)
        strip(ch, stdout);

    return 0;
}

什么有效:

  • 单行评论"xxx // comment"
  • 普通多行评论"xxx /* comment\n another comment */ yyy"
  • 嵌套评论"xxx /* comment /* nested comment */ end of comment */ yyy

目前尚未实施和测试:

  • 单行评论背后的多行评论
  • 续行
  • 转义字符

亲切的问候, 杰里

答案 5 :(得分:0)

您可以参考下面的简单代码:

#include<stdio.h>
void main()
{
printf("Hello");
/*--------------------------------------------
  ------------------Ignored by compiler-------
  --------------------------------------------
*/
printf("By");
}

<强> INPUT:

#include<stdio.h>
void main()
{
printf("Hello");
printf("By");
}

<强>输出:

{{1}}