我想部分自动化大学课程的C代码(ANSI C99)评分。我要计算的一个属性是每个C函数的行数(可选地不包括空白和注释行)。
我知道有几种工具可以过滤掉文件中的空白行和注释行,但这只能解决一半的问题。我想分隔属于单个C函数的行。
有人告诉我正则表达式不起作用。有使用gcc预处理器的聪明方法吗?
答案 0 :(得分:3)
Clang有一个用于打印语法树的开关。
例如,如果我跑步
clang -Xclang -ast-dump -fsyntax-only lc.c
上
lc.c:
int main()
{
}
void f()
{
}
我得到:
...
|-FunctionDecl 0x558d2c812890 <lc.c:1:1, line:5:1> line:1:5 main 'int ()'
| `-CompoundStmt 0x558d2c812970 <line:2:1, line:5:1>
`-FunctionDecl 0x558d2c8129c8 <line:7:1, line:9:1> line:7:6 f 'void ()'
`-CompoundStmt 0x558d2c812a68 <line:8:1, line:9:1>
如果编写脚本从深度CompoundStmt
之前的深度= 1 FunctionDecl
提取行号(FunctionDecl + CompoundStmt ==函数定义),然后减去它们,则得到您的函数的行长减去1。
预处理器仅是令牌生成器。您需要适当的解析器。
答案 1 :(得分:0)
您可以通过2个步骤解决此问题:
{
,{
,,
,;
...)。这将使您的人数减少对程序员使用的编码样式的依赖。这是第一步的帮助:剥离注释的解析器:
/* strip C comments by chqrlie */
#include <errno.h>
#include <stdio.h>
#include <string.h>
/* read the next byte from the C source file, handing escaped newlines */
int getcpp(FILE *fp, int *lineno_p) {
int ch;
while ((ch = getc(fp)) == '\\') {
if ((ch = getc(fp)) != '\n') {
ungetc(ch, fp);
return '\\';
}
*lineno_p += 1;
}
if (ch == '\n')
*lineno_p += 1;
return ch;
}
int main(int argc, char *argv[]) {
FILE *fp = stdin, *ft = stdout;
const char *filename = "<stdin>";
int ch, lineno;
if (argc > 1) {
if ((fp = fopen(filename = argv[1], "r")) == NULL) {
fprintf(stderr, "Cannot open input file %s: %s\n",
filename, strerror(errno));
return 1;
}
}
if (argc > 2) {
if ((ft = fopen(argv[2], "w")) == NULL) {
fprintf(stderr, "Cannot open output file %s: %s\n",
argv[2], strerror(errno));
return 1;
}
}
lineno = 1;
while ((ch = getcpp(fp, &lineno)) != EOF) {
int startline = lineno;
if (ch == '/') {
if ((ch = getcpp(fp, &lineno)) == '/') {
/* single-line comment */
while ((ch = getcpp(fp, &lineno)) != EOF && ch != '\n')
continue;
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated single line comment\n",
filename, startline);
break;
}
putc('\n', ft); /* replace comment with newline */
continue;
}
if (ch == '*') {
/* multi-line comment */
int lastc = 0;
while ((ch = getcpp(fp, &lineno)) != EOF) {
if (ch == '/' && lastc == '*') {
break;
}
lastc = ch;
}
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated comment\n",
filename, startline);
break;
}
putc(' ', ft); /* replace comment with single space */
continue;
}
putc('/', ft);
/* keep parsing to handle n/"a//"[i] */
}
if (ch == '\'' || ch == '"') {
int sep = ch;
const char *const_type = (ch == '"') ? "string" : "character";
putc(sep, ft);
while ((ch = getcpp(fp, &lineno)) != EOF) {
putc(ch, ft);
if (ch == sep)
break;;
if (ch == '\\') {
if ((ch = getcpp(fp, &lineno)) == EOF)
break;
putc(ch, ft);
}
if (ch == '\n') {
fprintf(stderr, "%s:%d: unescaped newline in %s constant\n",
filename, lineno - 1, const_type);
/* This is a syntax error but keep going as if constant was terminated */
break;
}
}
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated %s constant\n",
filename, startline, const_type);
break;
}
continue;
}
putc(ch, ft);
}
if (fp != stdin)
fclose(fp);
if (ft != stdout)
fclose(ft);
return 0;
}