一种更优雅的解析方式

时间:2018-05-17 00:00:17

标签: c parsing file-io system-calls

我是C的新手。 我需要编写一个小函数来打开一个包含3行的配置文件,每行包含一个我需要提取的文件/目录的路径。

我写了这个程序,它似乎有效:

void readCMDFile(char* cmdFile,char directoryPath[INPUT_SIZE], char inputFilePath[INPUT_SIZE],char outputFilePath [INPUT_SIZE]) {
  //open files
  int file = open(cmdFile, O_RDONLY);
  if (file < 0) {
    handleFailure();
  }
  char buffer[BUFF_SIZE];
  int status;
  int count;
  while((count=read(file,buffer,sizeof(buffer)))>0)
  {
    int updateParam = UPDATE1;
    int i,j;
    i=0;
    j=0;
    for (;i<count;i++) {
      if (buffer[i]!='\n'&&buffer[i]!=SPACE&&buffer[i]!='\0') {
        switch (updateParam){
          case UPDATE1:
            directoryPath[j] = buffer[i];
            break;
          case UPDATE2:
            inputFilePath[j] = buffer[i];
            break;
          case UPDATE3:
            outputFilePath[j] = buffer[i];
            break;
        }
        j++;

      } else{
        switch (updateParam){
          case UPDATE1:
            updateParam = UPDATE2;
            j=0;
            break;
          case UPDATE2:
            updateParam = UPDATE3;
            j=0;
            break;

        }

      }
    }
  }
  if (count < 0) {
    handleFailure();
  }

}

但它非常不直观且非常难看,所以我认为必须有一种更优雅的方式来做到这一点。有什么建议吗? 谢谢!

更新:配置文件内容将如下所示:

/home/bla/dirname
/home/bla/bla/file1.txt
/home/bla/bla/file2.txt

4 个答案:

答案 0 :(得分:1)

你的问题不是关于解析文件内容的问题,它只是关于将文件的行读入函数内的适当存储,其方式是包含存储行的对象可以返回到调用功能。这是相当标准的,但您有很多方法可以接近它。

最大的考虑因素是不知道要读取的行的长度。你说现在有3行需要阅读,但是没有必要事先知道有多少行(通过知道 - 你可以避免realloc,但这是唯一的节省)< / p>

您希望创建一个强大而灵活的方法来读取行并以一种分配足够内存来保存读取内容的方式存储它们。一个好的方法是声明一个固定大小的临时缓冲区,用fgets保存从文件中读取的每一行,然后在缓冲区上调用strlen以确定所需的字符数(以及修剪) fgets包含的尾随换行符)由于您正在读取路径信息,因此可以使用预定义的宏PATH_MAX来充分调整临时缓冲区的大小,以确保它可以保存系统可用的最大大小路径。您也可以使用POSIX geline代替fgets,但我们现在将坚持使用C标准库。

允许您为函数中的多行分配存储并返回可在调用函数中使用的单个指针的基本类型是char **指向char的指针 > - 或松散的动态指针数组)。方案很简单,你为一些初始指针分配(在你的情况下为3),然后循环遍历文件,一次读一行,获得行的长度,然后分配length + 1个字符存放以保持线路。例如,如果您使用以下命令分配3个指针:

#define NPATHS 3
...
char **readcmdfile (FILE *fp, size_t *n)
{
    ...
    char buf[PATH_MAX] = "";    /* temp buffer to hold line */
    char **paths = NULL;        /* pointer to pointer to char to return */
    size_t idx = 0;             /* index counter (avoids dereferencing) */
    ...
    paths = calloc (NPATHS, sizeof *paths); /* allocate NPATHS pointers */
    if (!paths) {                   /* validate allocation/handle error */
        perror ("calloc-paths");
        return NULL;
    }
    ...
    while (idx < NPATHS && fgets (buf, sizeof buf, fp)) {
        size_t len = strlen (buf);          /* get length of string in buf */
        ...
        paths[idx] = malloc (len + 1);      /* allocate storage for line */
        if (!paths[idx]) {                  /* validate allocation */
            perror ("malloc-paths[idx]");   /* handle error */
            return NULL;
        }
        strcpy (paths[idx++], buf);     /* copy buffer to paths[idx] */
        ...
    return paths;   /* return paths */
}

注意:您可以消除idx < NPATHS的限制,如果您在为每个字符串分配之前包含检查,并根据需要添加realloc个更多指针

其余部分只是处理打开文件并将打开的文件流传递给您的函数。一种基本方法是在命令行上提供文件名,然后打开fopen提供的文件名(如果没有给出文件名,则默认从stdin读取)。与程序中的每个步骤一样,您需要验证返回处理任何错误以避免处理垃圾(并调用未定义的行为)< / p>

一个简单的例子是:

int main (int argc, char **argv) {

    char **paths;       /* pointer to pointer to char for paths */
    size_t i, n = 0;    /* counter and n - number of paths read */
    /* open file given by 1st argument (or read stdin by default) */
    FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;

    if (!fp) {  /* validate file open for reading */
        perror ("fopen-failed");
        return 1;
    }

    paths = readcmdfile (fp, &n);   /* call function to read file */
                                    /* passing open file pointer */
    if (!paths) {   /* validate return from function */
        fprintf (stderr, "error: readcmdfile failed.\n");
        return 1;
    }

    for (i = 0; i < n; i++) {   /* output lines read from file */
        printf ("path[%lu]: %s\n", i + 1, paths[i]);
        free (paths[i]);        /* free memory holding line */
    }
    free (paths);   /* free pointers */

    return 0;
}

将所有部分组合在一起,添加代码'\n'读取buf fgets buf,并添加额外的测试以确保您阅读的行真正适合在#include <stdio.h> #include <stdlib.h> #include <string.h> #include <limits.h> /* for PATH_MAX */ #define NPATHS 3 /* read lines from file, return pointer to pointer to char on success * otherwise return NULL. 'n' will contain number of paths read from file. */ char **readcmdfile (FILE *fp, size_t *n) { char buf[PATH_MAX] = ""; /* temp buffer to hold line */ char **paths = NULL; /* pointer to pointer to char to return */ size_t idx = 0; /* index counter (avoids dereferencing) */ *n = 0; /* zero the pointer passed as 'n' */ paths = calloc (NPATHS, sizeof *paths); /* allocate NPATHS pointers */ if (!paths) { /* validate allocation/handle error */ perror ("calloc-paths"); return NULL; } /* read while index < NPATHS & good read into buf * (note: instead of limiting to NPATHS - you can simply realloc paths * when idx == NPATHS -- but that is for later) */ while (idx < NPATHS && fgets (buf, sizeof buf, fp)) { size_t len = strlen (buf); /* get length of string in buf */ if (len && buf[len - 1] == '\n') /* validate last char is '\n' */ buf[--len] = 0; /* overwrite '\n' with '\0' */ else if (len == PATH_MAX - 1) { /* check buffer full - line to long */ fprintf (stderr, "error: path '%lu' exceeds PATH_MAX.\n", idx); return NULL; } paths[idx] = malloc (len + 1); /* allocate storage for line */ if (!paths[idx]) { /* validate allocation */ perror ("malloc-paths[idx]"); /* handle error */ return NULL; } strcpy (paths[idx++], buf); /* copy buffer to paths[idx] */ } *n = idx; /* update 'n' to contain index - no. of lines read */ return paths; /* return paths */ } int main (int argc, char **argv) { char **paths; /* pointer to pointer to char for paths */ size_t i, n = 0; /* counter and n - number of paths read */ /* open file given by 1st argument (or read stdin by default) */ FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin; if (!fp) { /* validate file open for reading */ perror ("fopen-failed"); return 1; } paths = readcmdfile (fp, &n); /* call function to read file */ /* passing open file pointer */ if (!paths) { /* validate return from function */ fprintf (stderr, "error: readcmdfile failed.\n"); return 1; } for (i = 0; i < n; i++) { /* output lines read from file */ printf ("path[%lu]: %s\n", i + 1, paths[i]); free (paths[i]); /* free memory holding line */ } free (paths); /* free pointers */ return 0; } 中,您可以执行以下操作:

$ cat paths.txt
/home/bla/dirname
/home/bla/bla/file1.txt
/home/bla/bla/file2.txt

注意:如果你分配内存 - 由你来保留指向每个块开头的指针 - 所以当它不再需要时可以释放它)

示例输入文件

$ ./bin/readpaths <paths.txt
path[1]: /home/bla/dirname
path[2]: /home/bla/bla/file1.txt
path[3]: /home/bla/bla/file2.txt

示例使用/输出

main()

正如您所看到的,函数只是读取输入文件的每一行,分配了3个指针,为每一行分配并将每个块的地址分配给相应的指针,然后将指向集合的指针返回到{{1它被分配给paths的地方。仔细看看,如果您有其他问题,请告诉我。

答案 1 :(得分:0)

如果我是你,我将为if / else块创建一个方法。我觉得他们多余了。

 switch(updateParam) {
     case UPDATE1:
            method(); /*do if/else here*/
            break;
     ...............
     ...............
 }

但是,如果您不需要其他时间的方法,并且您担心性能问题,您仍然可以将它们放在那里,因为函数调用成本不仅仅是集体指令。

答案 2 :(得分:0)

我建议调查regular expressions。这样你就可以阅读所有内容,然后与正则表达式匹配并处理你的匹配。

为此目的存在正则表达式:使解析变得优雅。

答案 3 :(得分:0)

在您的程序中,您正在传递3数组char以存储从文件中读取的3行。但这是非常低效的,因为输入文件可能包含更多行,并且将来,您可能需要从文件中读取超过3行。相反,您可以传递char指针数组并为其分配内存,并复制从文件中读取的行的内容。正如Jonathan所指出的那样(在评论中),如果您使用标准I / O,那么您可以使用fgets()之类的函数来读取行 来自输入文件。
从文件中读取一行并将内存分配给指针并复制该行,从文件中读取该行。如果行太长,您可以在fgets()的连续调用中读取剩余部分,并使用realloc扩展现有内存,指针指向,足够大以容纳行的剩余部分读取。

把这些放在一起,你可以做到:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define BUF_SZ    100
#define MAX_LINES 3  /* Maximum number of lines to be read from file */

int readCMDFile(const char* cmdFile, char *paths[MAX_LINES]) {
    int count, next_line, line_cnt, new_line_found;
    char tmpbuf[BUF_SZ];
    FILE *fp;

    fp = fopen(cmdFile, "r");
    if (fp == NULL) {
        perror ("Failed to open file");
        return -1;
    }

    next_line = 1; /* Keep track of next line */
    count = 1;     /* Used to calculate the size of memory, if need to reallocte 
                    * in case when a line in the file is too long to read in one go */
    line_cnt = 0;  /* Keep track of index of array of char pointer */
    new_line_found = 0; 

    while ((line_cnt < MAX_LINES) && (fgets (tmpbuf, BUF_SZ, fp) != NULL)) {
        if (tmpbuf[strlen(tmpbuf) - 1] == '\n') {
            tmpbuf[strlen(tmpbuf) - 1] = '\0';
            new_line_found = 1;
        } else {
            new_line_found = 0;
        }

        if (next_line) {
            paths[line_cnt] = calloc (sizeof (tmpbuf), sizeof (char));
            if (paths[line_cnt] == NULL) {
                perror ("Failed to allocate memory");
                return -1;
            }
            next_line = 0;
            count = 1;
        } else {
            char *ptr = realloc (paths[line_cnt], sizeof (tmpbuf) * (++count));
            if (ptr == NULL) {
                free (paths[line_cnt]);
                perror ("Failed to reallocate memory");
                return -1;
            } else {
                paths[line_cnt] = ptr;
            }
        }

        /* Using strcat to copy the buffer to allocated memory because
         * calloc initialize the block of memory with zero, so it will
         * be same as strcpy when first time copying the content of buffer 
         * to the allocated memory and fgets add terminating null-character 
         * to the buffer so, it will concatenate the content of buffer to 
         * allocated memory in case when the pointer is reallocated */
        strcat (paths[line_cnt], tmpbuf);
        if (new_line_found) {
            line_cnt++;
            next_line = 1;
        }
    }

    fclose(fp);
    return line_cnt;
}

int main(void) {
    int lines_read, index;
    const char *file_name = "cmdfile.txt";
    char *paths[MAX_LINES] = {NULL};

    lines_read = readCMDFile(file_name, paths);
    if (lines_read < 0) {
        printf ("Failed to read file %s\n", file_name);
    }

    /* Check the output */
    for (index = 0; index < lines_read; index++) {
        printf ("Line %d: %s\n", index, paths[index]);
    }

    /* Free the allocated memory */
    for (index = 0; index < lines_read; index++) {
        free (paths[index]);
        paths[index] = NULL;
    }

    return 0;
}

输出:

$ cat cmdfile.txt
/home/bla/dirname
/home/bla/bla/file1.txt
/home/bla/bla/file2.txt

$ ./a.out
Line 0: /home/bla/dirname
Line 1: /home/bla/bla/file1.txt
Line 2: /home/bla/bla/file2.txt

请注意,上述程序没有处理文件中的空行,因为问题中没有提到。但是如果你愿意,可以在从文件中读取的行中删除尾随换行符后立即添加该检查。