需要帮助从c文件中提取注释

时间:2017-11-30 02:46:25

标签: c unix

我只需要帮助如何从输入文本文件中提取注释,并在Unix命令行中使用C语言将它们粘贴到输出文件中。我不需要代码。请帮我指点一下。这就是我想要的。

输入:

If the input file input_0.txt contains 
/* This is a single-line C comment */ 
#include <stdio.h>
/******  
* This is a nicely formatted  
* multi-line comment.  
******/ 
int main(int argc, char **argv) 
{   
  // This is a C++ comment. 
} 

输出:

Then the execution of the program would be as follows. 
$ ./Comments < input_0.txt 
This is a single-line C comment 
This is a nicely formatted 
multi-line comment. 
This is a C++ comment.

这是我从尊重的@David C. Rankin的代码修改的代码。

#include <stdio.h>
#include <string.h>
#include <ctype.h>

#define MAXC 1024

int main (int argc, char **argv) {

/* Variables for removing comments*/
int ch, i = 0, flag = 0, prev = '\0';
    FILE *fp1, *fp2;
    char fname[MAX], temp[] = "temp.txt";
/* Variables for removing comments*/    

int inmulti = 0,
    insingle = 0,
    longline = 0;
char buf[MAXC] = "";

 FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;


/* validate file open for reading */

if (!fp) {  
    fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
    return 1;
}

/* open the temporary file in write mode */
    fp2 = fopen(temp, "w");

    /* error handling */
    if (!fp2) {
            printf("Unable to open temporary file!!\n");
            return 0;
    }


while (fgets (buf, MAXC, fp)) {     /* read upto MAXC into buf */
    char *p = buf;                  /* pointer to buf */
    size_t len = strlen (buf);      /* get length */

    if (longline) {                 /* is this 2nd read of long line? */
        if (insingle) {             /* are we in a single comment? */
            printf ("%s", buf);     /* print it, get next buf */
            continue;
        }
        else                        /* otherwise, reset insingle flag */
            insingle = 0;
    }

    if (inmulti) {                  /* are we in a multi-line comment? */
                        /* (note: you need to check if quoted here) */
        if (strstr (buf, "*/")) {   /* does buf contain ending? */
            inmulti = 0;            /* reset inmulti comment */
        }
        printf ("%s", buf);         /* print the line */
        continue;       /* (note: end can be before end of line) */
    }

    if (len && buf[len-1] != '\n')  /* check if end of line read */
        longline = 1;               /* if not, set longline */
    else
        longline = 0;               /* or, reset it */

    while (*p && *p != '/') p++;    /* find start (or end) of comment */
    if (!*p) continue;              /* none found, get next buf */

    if (*(p + 1) == '/') {          /* start of single line comment */
                        /* note: must make sure not part of path here */
        insingle = 1;               /* set single-line comment flag */
        printf ("%s", buf);         /* print line */
    }                   /* note: can print from p for comment only */
    else if (*(p + 1) == '*') {     /* start of multiline comment */
        if (!strstr (p + 2, "*/")) {    /* check for ending */
            inmulti = 1;            /* set multiline flag */
        }
        printf ("%s", buf);         /* print the line */
    }                   /* note: can print from p for comment only */
    else if (p > buf && *(p - 1) == '*') {  /* was / end of multi? */
        printf ("%s", buf);         /* end of multi line comment */
        inmulti = 0;
    }
}
rewind(fp);
 /* removes comments from the given input file */
    prev = fgetc(fp);
    while ((ch = fgetc(fp)) != EOF) {

            /* flag is 1 - double slash comment */
            if (flag == 1) {
                    /* skip the contents until you detect \n */
                    if (ch == '\n') {
                            flag = 0;
                            prev = fgetc(fp);
                    }
                    continue;
            }

            /* flag is 2 - slash arsterix comment */
            if (flag == 2) {
                    /* skip the contents until you detect asterix slash */
                    if (ch == '/' && prev == '*') {
                            flag = 0;
                            prev = fgetc(fp);
                    }
                    continue;
            }

            /* checking for double slash comment */
            if (ch == '/' && prev == '/') {
                    flag = 1;
            } else if (prev == '/' && ch == '*') {
                    /* slash asterix comment */
                    flag = 2;
            } else {
                    /* contents outside of comments */
                    fputc(prev, fp2);
            }
            prev = ch;
    }


if (fp != stdin) fclose (fp);   /* close file if not stdin */

/* closing the input file */
    fclose(fp);
    fclose(fp2);

return 0;
}

2 个答案:

答案 0 :(得分:2)

注意,要正确执行此操作,还需要检查更多条件(例如"//""/*""*/"作为路径的一部分出现,或者在一个字符串)。使用正则表达式也是解决此问题的另一种方法。

如果我理解正确,并且您希望使用基本C解析源文件的注释行,那么以下是读取文件中所有行的快速示例(作为第一个参数提供,或者在stdin)并寻找单行或多行注释分隔符。

这不是完整的,涵盖所有角落情况或分隔符出现在文字,定义等中的情况。但是,我们已经注意到应该注意添加额外代码以解决这些问题的位置。

基本方法是读取MAXC1024字节块)中的一行并跟踪3个标志。 longline表示该行超过MAXC个字符,您已读取第二个(或第3个或第4个...)缓冲区已满。 inmulti跟踪您是否在进行多行评论。最后insingle您所在的单行注释中可能超过MAXC个字符。读取循环根据标志的状态进行检查和操作,同时查找多行注释的结尾(如果在一个内)。该代码还会检查多行注释的开始和结束 - 所有这些都在一行内。

鉴于这些资格,您可以从以下内容开始:

#include <stdio.h>
#include <string.h>
#include <ctype.h>

#define MAXC 1024

int main (int argc, char **argv) {

    int inmulti = 0,
        insingle = 0,
        longline = 0;
    char buf[MAXC] = "";
    FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;

    if (!fp) {  /* validate file open for reading */
        fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
        return 1;
    }

    while (fgets (buf, MAXC, fp)) {     /* read upto MAXC into buf */
        char *p = buf;                  /* pointer to buf */
        size_t len = strlen (buf);      /* get length */

        if (longline) {                 /* is this 2nd read of long line? */
            if (insingle) {             /* are we in a single comment? */
                printf ("%s", buf);     /* print it, get next buf */
                continue;
            }
            else                        /* otherwise, reset insingle flag */
                insingle = 0;
        }

        if (inmulti) {                  /* are we in a multi-line comment? */
                            /* (note: you need to check if quoted here) */
            if (strstr (buf, "*/")) {   /* does buf contain ending? */
                inmulti = 0;            /* reset inmulti comment */
            }
            printf ("%s", buf);         /* print the line */
            continue;       /* (note: end can be before end of line) */
        }

        if (len && buf[len-1] != '\n')  /* check if end of line read */
            longline = 1;               /* if not, set longline */
        else
            longline = 0;               /* or, reset it */

        while (*p && *p != '/') p++;    /* find start (or end) of comment */
        if (!*p) continue;              /* none found, get next buf */

        if (*(p + 1) == '/') {          /* start of single line comment */
                            /* note: must make sure not part of path here */
            insingle = 1;               /* set single-line comment flag */
            printf ("%s", buf);         /* print line */
        }                   /* note: can print from p for comment only */
        else if (*(p + 1) == '*') {     /* start of multiline comment */
            if (!strstr (p + 2, "*/")) {    /* check for ending */
                inmulti = 1;            /* set multiline flag */
            }
            printf ("%s", buf);         /* print the line */
        }                   /* note: can print from p for comment only */
        else if (p > buf && *(p - 1) == '*') {  /* was / end of multi? */
            printf ("%s", buf);         /* end of multi line comment */
            inmulti = 0;
        }
    }

    if (fp != stdin) fclose (fp);   /* close file if not stdin */

    return 0;
}

示例输入文件

$ cat dat/comments.txt
/* This is a single-line C comment */
#include <stdio.h>
/******
* This is a nicely formatted
* multi-line comment.
******/
int main(int argc, char **argv)
{
// This is a C++ comment.
}

示例使用/输出

$ ./bin/comments <dat/comments.txt
/* This is a single-line C comment */
/******
* This is a nicely formatted
* multi-line comment.
******/
  // This is a C++ comment.

注意:在这样的练习中的价值在于学习价值通过识别某些单个字符的长字符串,以及在循环时处理各种标志和程序状态文件。

逐字逐句阅读

要从面向行的方法切换到面向字符的方法(并添加chux注释中注明的几个状态),您将阅读第一个字符(保存),然后读取文件中的其余字符。这提供了一种比较之前当前的方法,以确定您是否在单行注释内, multi -line 评论或引号。

同样,这并不是为了捕捉每个角落的情况,但输出已更新为不打印开始或结束注释分隔符。 (您需要根据自己的喜好在多行评论和评论中调整*的打印。

从使用fgets阅读更改为fgetc,您可以执行类似以下操作:

#include <stdio.h>

int main (int argc, char **argv) {

    int inmulti = 0,    /* in multi-line comment flag */
        insingle = 0,   /* in single-line comment flag */
        insquo = 0,     /* within single-quotes */
        indquo = 0,     /* within double-quotes */
        c, prev = 0;
    FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;

    if (!fp) {  /* validate file open for reading */
        fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
        return 1;
    }

    if ((prev = fgetc(fp)) == EOF)      /* read 1st char */
        return 1;
    while ((c = fgetc(fp)) != EOF) {    /* read remaining */
        switch (c) {                    /* switch on c */
            case '/':
                if (prev == '/' && !(insquo | indquo))
                    insingle = 1;
                if (prev == '*' && !(insquo | indquo))
                    inmulti = 0;
                break;
            case '*':
                if (prev == '/' && !(insquo | indquo))
                    inmulti = 1;
                break;
            case '\n':
                insingle = 0;
                if (insingle || inmulti)
                    putchar (c);
                break;
            case '\'':
                insquo = insquo ? 0 : 1;
                break;
            case '"':
                indquo = indquo ? 0 : 1;
                break;
            default:
                if ((insingle || inmulti) && !(insquo | indquo))
                    putchar (c);
                break;
        }
        prev = c;
    }
    if (fp != stdin) fclose (fp);   /* close file if not stdin */

    putchar ('\n');     /* tidy up with newline */

    return 0;
}

示例使用/输出

$ ./bin/commentsfgetc <dat/comments.txt
 This is a single-line C comment
 This is a nicely formatted
 multi-line comment.
 This is a C++ comment.

仔细看看,如果您对如何识别字符或控制代码以查找注释块的开头和结尾有疑问,请告诉我。

答案 1 :(得分:1)

您可以使用此shell脚本执行此操作,并保存到文件comments.txt

cat generic.c | awk '/\/\// {print $0}; /\/\*/ {aux=1}; {if(aux) print $0}; /\*\// {aux=0}' > comments.txt

祝你好运