c - 删除注释(/ *和//)和字符串

时间:2013-04-19 12:58:29

标签: c

任何人都可以告诉我以下代码是否有错误,这些代码假设从输入中删除注释和字符串(但不是注释,这就是它识别注释的原因)?这与我之前的问题有关:Removing comments with a sliding window without nested while loops

#include <stdio.h>

int main()
{
    int c, c1 = 0, c2 = 0 ,state = 0, next = 0;
    while(1)
    {
        switch(state)
        {
           case 0: next = ((c2 == '*' && c1 == '/') ? 1 : (c2 == '\"') ? 2 : (c2 == '/' && c1 == '/') ? 3 : (c2 == '\'') ? 4: 0); break; 
           case 1: next = ((c2 == '/' && c1 == '*') ? 0 : 1); break; 
           case 2: next = ((c2 == '\"' && c1 != '\\') ? 0 : 2); break;
           case 3: next = ((c2 == '\n') ? 0 : 3); break;
           case 4: next = ((c2 == '\'' && c1 != '\\') ? 0 : 4); break;
           default: next = state; 
        }
        c = getchar(); if( c < 0) break;
        c1 = c2; c2 = c; // slide window
        if(state == 1)
        {
            if(c2 == '*')
            {
                c = getchar();
                c1 = c2; c2 = c;
                if(c2 != '/')
                   putchar(c1);
            }
            else
                putchar(c2);
        }
        else if(state == 2)
        {
            if(c2 != '"' || (c2 == '\"' && c1 != '\\'))
                putchar(c2);
        }
        else if(state == 3)
        {
                putchar(c2);
        }
        else
        state = next;
        // c2 is the current input byte and c1 is the previous input byte
    }
    return 0;
}

2 个答案:

答案 0 :(得分:1)

我认为您实际上不需要一个滑动窗口来执行删除C和C ++注释的任务。您可以扩展状态机以包含一些用于跟踪转义的附加状态等等...有了更多状态,代码会变得更大,但可能使它在概念上更简单,因为您只有一个状态跟踪。所以将你的代码精神转换为我建议的新状态机公式,你得到下面的代码(我也同意Basile建议使用枚举并包含它)。

#include <stdio.h>

int main()
{
    enum {
        START, SLASH,
        STRING, CHAR, STRING_ESCAPE, CHAR_ESCAPE,
        SINGLE_LINE_COMMENT, MULTI_LINE_COMMENT, MULTI_LINE_END,
    } state = START;
    int c;

    while ((c = getchar()) != EOF) {
        switch (state) {
        case START:
        state_START:
            if (c == '/') { state = SLASH; break; }
            putchar(c);
            if (c == '\"') state = STRING;
            else if (c == '\'') state = CHAR;
            break;
        case SLASH:
            if (c == '/') state = SINGLE_LINE_COMMENT;
            else if (c == '*') state = MULTI_LINE_COMMENT;
            else { state = START; goto state_START; }
            break;
        case STRING:
            putchar(c);
            if (c == '"') state = START;
            else if (c == '\\') state = STRING_ESCAPE;
            break;
        case CHAR:
            putchar(c);
            if (c == '\'') state = START;
            else if (c == '\\') state = CHAR_ESCAPE;
            break;
        case SINGLE_LINE_COMMENT:
            if (c == '\n') state = START;
            break;
        case MULTI_LINE_COMMENT:
        state_MULTI_LINE_COMMENT:
            if (c == '*') state = MULTI_LINE_END;
            break;
        case STRING_ESCAPE:
            putchar(c);
            state = STRING;
            break;
        case CHAR_ESCAPE:
            putchar(c);
            state = CHAR;
            break;
        case MULTI_LINE_END:
            if (c == '/') state = START;
            else { state = MULTI_LINE_COMMENT; goto state_MULTI_LINE_COMMENT; }
            break;
        }
    }
    return 0;
}

答案 1 :(得分:0)

只是在不知道你的目的的情况下给你一个建议......你是否考虑过正则表达式来解决你的问题?假设您理解正则表达式,它可能会更快,您的代码也会更清晰。

顺便说一句,我为你的问题找到了一个简洁的网站......它解释了如何从代码中获取这些评论......

How get comments with regex

here是C语言中的正则表达式库。