将CSV解析为动态分配的结构数组(ANSI 89)

时间:2019-06-27 02:53:24

标签: c arrays csv segmentation-fault dynamic-memory-allocation

我正在尝试将csv解析为动态分配的结构数组,但是我的尝试因分段错误而崩溃。

这是我的数据结构:

SO02773202,5087001,0
SO02773203,5087001,0
SO02773204,5087001,0
SO02773205,5087001,0
SO02773206,5087001,14

这是我要将数据解析为的结构:

typedef struct saleslines{
  char* salesid;
  char* smmcampaignid;
  int numberofbottles;
} saleslines_t;

这是我解析文件的尝试:

int read_saleslines(saleslines_t* saleslines, int number_of_lines){
  char c;

  FILE* fp; 
  fp = fopen(FILENAME, "r");             /* Open the saleslines file */

  if(fp == NULL){                              /* Crash if file not found */
  printf("Error - file not found\n");
    return 0;
  }

  c = getc(fp);
  while (c != EOF){
    if (c == '\n'){
    number_of_lines += 1;
    }
    c = getc(fp);
  }

  printf("Number of lines is %d\n", number_of_lines);

  saleslines = (saleslines_t*) malloc((number_of_lines * 2) * sizeof(saleslines_t));

  /* allocation of the buffer for every line in the File */
  char *buf = (char*) malloc(1000);
  char *tmp; 

  if ( ( fp = fopen(FILENAME, "r" ) ) == NULL )
  {
    printf( "File could not be opened.\n" );
  }
  int i = 0;
  while (fgets(buf, 255, fp) != NULL){
    if ((strlen(buf)>0) && (buf[strlen (buf) - 1] == '\n'))
      buf[strlen (buf) - 1] = '\0';       

    tmp = strtok(buf, ",");
    saleslines[i].salesid = strdup(tmp);

    tmp = strtok(NULL, ",");
    saleslines[i].smmcampaignid = strdup(tmp);

    tmp = strtok(NULL, ",");
    saleslines[i].numberofbottles = atoi(tmp);

    printf("Salesid: %s\nCampaign: %s\nBottles: %i\n\n", saleslines[i].salesid , saleslines[i].smmcampaignid, saleslines[i].numberofbottles);

    i++;
  }
  free(buf);
  fclose(fp);
  printf("Number of lines is %i\n", number_of_lines);
  return number_of_lines;
}

由于某种原因,它会解析文件并打印结果结构数组,但是当我在此之后立即调用此函数时,它会因段错误而崩溃:

void print_saleslines_struct(saleslines_t* saleslines, int number_of_lines{
  int i;
  printf("Number of lines is %i", number_of_lines);
  for(i = 0; i < number_of_lines; i++){
    printf("Salesid:\t %s\n", saleslines[i].salesid);
    printf("Campaign:\t %s\n", saleslines[i].smmcampaignid);
    printf("# of Bottles:\t %d\n", saleslines[i].numberofbottles);
  }
}

我似乎找不到此内存错误所在。

这是初始化和主要内容:

saleslines_t* saleslines;
saleslines_summary_t* saleslines_summary;
saleslines_grouped_t* saleslines_grouped;
int number_of_lines = 0;
int* number_of_linesp = &number_of_lines;

/* Main */

int main(){

  int chosen_option;

  while(1){

    printf("What would you like to do?\n");
    printf("1. Read saleslines.txt\n");
    printf("2. Print saleslines\n");
    printf("3. Summarise saleslines\n");
    printf("4. Exit the program\n");

    scanf("%d", &chosen_option);

    switch(chosen_option){

    /*  case 1 : number_of_lines = read_saleslines_file(saleslines, number_of_lines); break; */

      case 1 : number_of_lines = read_saleslines(saleslines, number_of_lines); break;

      case 2 : printf("Number of lines is %i", number_of_lines);  print_saleslines_struct(saleslines, number_of_lines); break;

      case 3 : summarise_saleslines(saleslines, number_of_linesp, saleslines_summary, saleslines_grouped); break;

      case 4 : free(saleslines); free(saleslines_summary); free(saleslines_grouped); return 0;   

    }

  }

  return 0;

}

更新

问题似乎出在我对结构数组的初始化上。

当我这样初始化它时:saleslines_t* saleslines; 然后像这样的malloc:saleslines = malloc(number_of_lines + 1 * sizeof(saleslines_t);

我遇到了段错误。

但是,如果我这样初始化:saleslines[600];(分配的文件数多于文件中的行数),则一切正常。

我该如何解决?我希望能够动态分配struct数组中的条目数。

编辑2

以下是建议的更改:

int read_saleslines(saleslines_t** saleslines, int number_of_lines);

saleslines_t* saleslines;
int number_of_lines = 0;

int main(){

  while(1){

    printf("What would you like to do?\n");
    printf("1. Read saleslines.txt\n");
    printf("2. Print saleslines\n");
    printf("3. Summarise saleslines\n");
    printf("4. Exit the program\n");

    printf("Number of saleslines = %i\n", number_of_lines);

    scanf("%d", &chosen_option);

    switch(chosen_option){

    /*  case 1 : number_of_lines = read_saleslines_file(saleslines, number_of_lines); break; */

      case 1 : number_of_lines = read_saleslines(&saleslines, number_of_lines); break;

      case 2 : printf("Number of lines is %i", number_of_lines);  print_saleslines_struct(saleslines, number_of_lines); break;

      case 3 : summarise_saleslines(saleslines, number_of_linesp, saleslines_summary, saleslines_grouped); break;

      case 4 : free(saleslines); free(saleslines_summary); free(saleslines_grouped); return 0;   

    }

  }

  return 0;

}

int read_saleslines(saleslines_t** saleslines, int number_of_lines)
{

  char c;

  FILE* fp; 
  fp = fopen(FILENAME, "r");             /* Open the saleslines file */

  if(fp == NULL){                              /* Crash if file not found */
  printf("Error - file not found\n");
    return 0;
  }

  c = getc(fp);
  while (c != EOF){
    if (c == '\n'){
    number_of_lines += 1;
    }
    c = getc(fp);
  }

  fclose(fp);

  printf("Number of lines is %d\n", number_of_lines);

  *saleslines = (saleslines_t*) malloc((number_of_lines + 1) * sizeof(saleslines_t));

  /* allocation of the buffer for every line in the File */
  char *buf = malloc(25);
  char *tmp; 

  if ( ( fp = fopen(FILENAME, "r" ) ) == NULL )
  {
    printf( "File could not be opened.\n" );
  }
  int i = 0;
  while (fgets(buf, 25, fp) != NULL){
    if ((strlen(buf)>0) && (buf[strlen (buf) - 1] == '\n'))
      buf[strlen (buf) - 1] = '\0';       

    tmp = strtok(buf, ",");
    (*saleslines)[i].salesid = strdup(tmp);

    tmp = strtok(NULL, ",");
    (*saleslines)[i].smmcampaignid = strdup(tmp);

    tmp = strtok(NULL, ",");
    (*saleslines)[i].numberofbottles = atoi(tmp);

    printf("Salesid: %s\nCampaign: %s\nBottles: %i\n\n", saleslines[i]->salesid , saleslines[i]->smmcampaignid, saleslines[i]->numberofbottles);

    i++;
  }
  free(buf);
  fclose(fp);
  printf("Number of lines is %i\n", number_of_lines);
  return number_of_lines;
}

现在,程序在读取struct数组中的第一个元素后会发生段错误。

2 个答案:

答案 0 :(得分:1)

您对read_saleslines()的参数有疑问。第一个参数应该是指向您的结构数组的指针,即双指针。

int read_saleslines(saleslines_t* saleslines, int number_of_lines){

您要修改saleslines指向的位置。 saleslines是函数的局部变量,作用域是该函数。退出read_saleslines()后,变量将被“破坏”,这意味着它包含的值不再可访问。添加间接指示的另一个级别,即指针,您可以修改在函数的外部中定义的变量(丑陋的全局变量或其他)。因此,更改该参数,以便函数原型匹配

int read_saleslines(saleslines_t** saleslines, int *);

并在功能内 更改访问它的位置(例如,添加*来访问该功能,

saleslines = (saleslines_t*) malloc((number_of_lines * ...

*saleslines = (saleslines_t*) malloc((number_of_lines * ...

saleslines[i].salesid = strdup(tmp);

(*saleslines)[i].salesid = strdup(tmp);

然后添加一个&,您可以在函数外部使用变量:

number_of_lines = read_saleslines(saleslines, number_of_lines);

更改为

some_var = read_saleslines(&saleslines, &number_of_lines);

这将使您的代码工作正常。

答案 1 :(得分:1)

您的代码以及通常的方法中都有大量错误。在分配和重新读取文件以尝试解析数据之前,无需对文件进行两次遍历以确定行数。此外,无需标记每一行以分隔逗号分隔值,sscanf()可以解析两个字符串,并且在用int读取每一行之后,这里一个fgets就足够了。

尽管您可以随意传递任何您喜欢的参数组合并返回您想要的任何东西,但是由于您要分配一个struct数组并将值读入该数组,因此从您的存储设备返回指向已分配数组的指针是有意义的函数(如果失败,则返回NULL),只需更新作为指针传递的参数,以使调用方可以重新读取总行数。

此外,通常您希望在调用程序中打开并验证文件,并传递一个FILE*参数,将打开的文件流传递给您的函数。考虑到这一点,您可以将功能重构为:

/* read saleslines into array of saleslines_t, allocating for
 * salesid, and smmcampaignid within each struct. Return pointer
 * to allocated array on success with lines updated to hold the
 * number of elements, or NULL otherwise.
 */
saleslines_t *read_saleslines (FILE *fp, size_t *lines)
{

在您的函数中,您只需要一个缓冲区来保存每行读取的内容,一个用于跟踪数组中分配的元素数量的计数器以及一个指向要返回的数组的指针。例如,您可以执行以下操作来处理所有三个:

    char buf[MAXC];                 /* buffer to hold line */
    size_t maxlines = MINL;         /* maxlines allocated */
    saleslines_t *sales = NULL;     /* pointer to array of struct */

注意:,因为您正在跟踪通过指针lines作为参数传递的读取行数,因此将该地址的值初始化为零是很有意义的)< / p>

现在,函数的工作开始了,您希望将每一行读入buf并从每一行中解析所需的信息。由于salesidsmmcampaignid都是结构体中的指向字符的指针,因此您需要为从行中解析出的每个字符串分配一个内存块,然后将该字符串复制到新的内存块中,然后将博克的起始地址分配给每个指针。要“动态”处理为结构分配的元素,只需检查填充的行数(*lines与分配的数(maxlines)是否相等(或者*lines是否为零)即可。表示需要进行初始分配),在两种情况下,realloc都将realloc(或新分配)用于您的结构体数组。

当您realloc始终使用临时指针realloc时,因此如果realloc失败并返回NULL,则不会覆盖您的NULL指向当前分配的块的指针,从而导致内存泄漏。

将所有内容放到函数的开始看似令人生畏,但这实际上很简单,例如

    while (fgets (buf, MAXC, fp)) { /* read each line in file */
        char id[MAXC], cid[MAXC];   /* temp arrays to hold strings */
        int bottles;                /* temp int for numberofbottles */
        if (*lines == maxlines || !*lines) {    /* check if realloc req'd */
            /* always realloc with a temp pointer */
            void *tmp = realloc (sales, 2 * maxlines * sizeof *sales);
            if (!tmp) { /* if realloc fails, original pointer still valid */
                perror ("realloc-sales");   /* throw error */
                return sales;               /* return current pointer      */ 
            }                               /* (don't exit or return NULL) */
            sales = tmp;    /* assign reallocated block to sales */
            /* (optional) zero newly allocated memory */
            memset (sales + *lines, 0, maxlines * sizeof *sales);
            maxlines *= 2;  /* update maxlines allocated */
        }

现在,您可以使用sscanf解析行中的所需信息,然后在成功解析信息之后,可以为salesidsmmcampaignid指针中的每一个分配,将解析的信息分别复制到新的内存块中,分别为每个指针分配起始地址,例如

        /* parse needed data from line (sscanf is fine here) */
        if (sscanf (buf, "%1023[^,],%1023[^,],%d", id, cid, &bottles) == 3) {
            size_t  idlen  = strlen (id),   /* get lengths of strings */
                    cidlen = strlen (cid);
            sales[*lines].salesid = malloc (idlen + 1); /* allocate string */
            if (!sales[*lines].salesid) {               /* validate! */
                perror ("malloc-sales[*lines].salesid");
                break;
            }
            sales[*lines].smmcampaignid = malloc (cidlen + 1);  /* ditto */
            if (!sales[*lines].smmcampaignid) {
                perror ("malloc-sales[*lines].smmcampaignid");
                break;
            }
            memcpy (sales[*lines].salesid, id, idlen + 1);  /* copy strings */
            memcpy (sales[*lines].smmcampaignid, cid, cidlen + 1);
            sales[(*lines)++].numberofbottles = bottles;    /* assign int */
        }   /* (note lines counter updated in last assignment) */

注意:,您可以使用strdup来获取每个已解析字符串的长度,并分配足够的内存来容纳该字符串,然后一次性将其分配给指针。例如sales[*lines].salesid = strdup (id);,但是... strdup不需要包含在C99或更高版本中,因此获取长度,分配length + 1字节然后{{1} }手动设置您的字符串以确保可移植性。此外,由于memcpy分配了内存,因此您必须验证返回的指针-使用该指针的人有99%忽略了该指针。)

就是这样,当strdup失败时,您已经达到fgets(),现在很简单:

EOF

将其完全放在一个简短的工作示例中,该示例将要读取的文件名作为程序的第一个参数(如果没有给出参数,则默认情况下从 return sales; /* return dynamically allocated array of struct */ } 进行读取),您可以这样做:

stdin

使用/输出示例

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAXC 1024   /* if you need a constant, #define one (or more) */
#define MINL    2

typedef struct saleslines{
    char *salesid;
    char *smmcampaignid;
    int numberofbottles;
} saleslines_t;

/* read saleslines into array of saleslines_t, allocating for
 * salesid, and smmcampaignid within each struct. Return pointer
 * to allocated array on success with lines updated to hold the
 * number of elements, or NULL otherwise.
 */
saleslines_t *read_saleslines (FILE *fp, size_t *lines)
{
    char buf[MAXC];                 /* buffer to hold line */
    size_t maxlines = MINL;         /* maxlines allocated */
    saleslines_t *sales = NULL;     /* pointer to array of struct */

    *lines = 0;     /* zero lines */

    while (fgets (buf, MAXC, fp)) { /* read each line in file */
        char id[MAXC], cid[MAXC];   /* temp arrays to hold strings */
        int bottles;                /* temp int for numberofbottles */
        if (*lines == maxlines || !*lines) {    /* check if realloc req'd */
            /* always realloc with a temp pointer */
            void *tmp = realloc (sales, 2 * maxlines * sizeof *sales);
            if (!tmp) { /* if realloc fails, original pointer still valid */
                perror ("realloc-sales");   /* throw error */
                return sales;               /* return current pointer      */ 
            }                               /* (don't exit or return NULL) */
            sales = tmp;    /* assign reallocated block to sales */
            /* (optional) zero newly allocated memory */
            memset (sales + *lines, 0, maxlines * sizeof *sales);
            maxlines *= 2;  /* update maxlines allocated */
        }
        /* parse needed data from line (sscanf is fine here) */
        if (sscanf (buf, "%1023[^,],%1023[^,],%d", id, cid, &bottles) == 3) {
            size_t  idlen  = strlen (id),   /* get lengths of strings */
                    cidlen = strlen (cid);
            sales[*lines].salesid = malloc (idlen + 1); /* allocate string */
            if (!sales[*lines].salesid) {               /* validate! */
                perror ("malloc-sales[*lines].salesid");
                break;
            }
            sales[*lines].smmcampaignid = malloc (cidlen + 1);  /* ditto */
            if (!sales[*lines].smmcampaignid) {
                perror ("malloc-sales[*lines].smmcampaignid");
                break;
            }
            memcpy (sales[*lines].salesid, id, idlen + 1);  /* copy strings */
            memcpy (sales[*lines].smmcampaignid, cid, cidlen + 1);
            sales[(*lines)++].numberofbottles = bottles;    /* assign int */
        }   /* (note lines counter updated in last assignment) */
    }

    return sales;   /* return dynamically allocated array of struct */
}

int main (int argc, char **argv) {

    saleslines_t *sales = NULL; /* pointer to saleslines_t */
    size_t nlines;
    /* use filename provided as 1st argument (stdin by default) */
    FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;

    if (!fp) {  /* validate file open for reading */
        perror ("file open failed");
        return 1;
    }

    sales = read_saleslines (fp, &nlines);  /* read saleslines */

    if (fp != stdin) fclose (fp);   /* close file if not stdin */

    for (size_t i = 0; i < nlines; i++) {   /* loop over each */
        printf ("sales[%2zu]:  %s  %s  %2d\n", i, sales[i].salesid,
                sales[i].smmcampaignid, sales[i].numberofbottles);
        free (sales[i].salesid);        /* free salesid */
        free (sales[i].smmcampaignid);  /* free smmcampaignid */
    }
    free (sales);   /* free sales */

    return 0;
}

内存使用/错误检查

在您编写的任何动态分配内存的代码中,对于任何分配的内存块,您都有2个职责:(1)始终保留指向起始地址的指针因此,(2)当不再需要它时可以释放

当务之急是使用一个内存错误检查程序来确保您不会尝试访问内存或在已分配的块的边界之外/之外进行写入,不要试图以未初始化的值读取或基于条件跳转,最后,以确认您释放了已分配的所有内存。

对于Linux,$ ./bin/saleslines dat/saleslines.txt sales[ 0]: SO02773202 5087001 0 sales[ 1]: SO02773203 5087001 0 sales[ 2]: SO02773204 5087001 0 sales[ 3]: SO02773205 5087001 0 sales[ 4]: SO02773206 5087001 14 是正常选择。每个平台都有类似的内存检查器。它们都很容易使用,只需通过它运行程序即可。

valgrind

始终确认已释放已分配的所有内存,并且没有内存错误。

动态分配任何东西都没有困难。只需将它分成足够小的块,就可以为每个需要分配的指针加点所有$ valgrind ./bin/saleslines dat/saleslines.txt ==19819== Memcheck, a memory error detector ==19819== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al. ==19819== Using Valgrind-3.12.0 and LibVEX; rerun with -h for copyright info ==19819== Command: ./bin/saleslines dat/saleslines.txt ==19819== sales[ 0]: SO02773202 5087001 0 sales[ 1]: SO02773203 5087001 0 sales[ 2]: SO02773204 5087001 0 sales[ 3]: SO02773205 5087001 0 sales[ 4]: SO02773206 5087001 14 ==19819== ==19819== HEAP SUMMARY: ==19819== in use at exit: 0 bytes in 0 blocks ==19819== total heap usage: 13 allocs, 13 frees, 935 bytes allocated ==19819== ==19819== All heap blocks were freed -- no leaks are possible ==19819== ==19819== For counts of detected and suppressed errors, rerun with: -v ==19819== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0) 并交叉所有"I's"。仔细研究一下,如果您还有其他问题,请告诉我。