使用strtok,适用于一个文件而不适用于另一个文件

时间:2013-04-12 10:00:45

标签: c

这是我的代码:

      char delims[4];
      delims[0]='\t';
      delims[1]=' ';
      delims[2]=',';
      delims[3]='\0';

      i = 0;
      while (fgets(line, 10000, fp) != NULL) 
      { 

        result = strtok(line,delims);

        while(result != NULL) {
          (*data_array)[i++] = atof(result);
          result = strtok(NULL, delims);
        }
      }

直截了当。它适用于以下文件:

      3.600000 79.000000
      1.800000 54.000000
      3.333000 74.000000
      2.283000 62.000000
      4.533000 85.000000
      2.883000 55.000000
      4.700000 88.000000

但它不适用于此文件:

      3.6   79  3
      1.8   54  3
      3.333 74  3
      2.283 62  1
      4.533 85  1
      2.883 55  1
      4.7   88  2
      3.6   85  1
      1.95  51  1
      4.35  85  3

我收到了“Aborted(core dumped)”错误。我做错了什么?

编辑:这是整个功能。 * data_array和* data_labels刚刚在main()中声明。我注释掉了一些data_labels部分,所以我可以让第一部分工作:

    int getdata(double* *data_array, int* *data_labels, int argc, char *argv[], int *items, int *attr)
    {  
      // filename variables
      char *filename;         // pointer to a string that will contain the name of the training data file.
      char *result = NULL;    // used with strtok() to extract each feature value given a line of delimited features.
      FILE *fp;               // pointer to FILE, we can use this with fgets to access each line
      char line[10000];       // array of 1000 chars for storing the raw data for one observation
      char delims[4];         // an array of common delimiters for data files
      delims[0]='\t';
      delims[1]=' ';
      delims[2]=',';
      delims[3]='\0';

      int i, j;

      // check that we have the correct number of command line arguments
      if (argc < 2)
      {
        printf("2usage: progname filename\n");
        return -1;
      }

      if (argc < 4)
      {
        printf("3usage: progname filename num_labels k(nn)\n");
        return -1;
      }

      if (atoi(argv[2]) < 1)
      {
        printf("num_labels must be a positive integer.\n");
        return -1;
      }

      if (atof(argv[2]) - atoi(argv[2]) > 0)
      {
        printf("num_labels must be an integer.\n");
        return -1;
      }

       if (atoi(argv[3]) < 1)
      {
        printf("k must be a positive integer.\n");
        return -1;
      }

      if (atof(argv[3]) - atoi(argv[3]) > 0)
      {
        printf("k must be an integer.\n");
        return -1;
      }

      // try to open the file
      filename = argv[1];
      fp = fopen(filename, "r");
      if (fp == NULL)
      {
        printf("could not open file: %s\n", filename);
        printf("note: the filename should be the second command line argument, after the .exe file");
        return -1;
      }

      printf("reading file: %s\n", filename);

      // get first line of the file to get num_items and num_attrs.
      fgets(line, 1000, fp);
      sscanf(line, "%d \t %d", items, attr);
      printf("num items: %d\n", *items);
      printf("num attributes: %d\n", *attr);  

      if (atoi(argv[3]) > *items)
      {
        printf("k should be smaller than the number of items in the input file.\n");
        return -1;
      }

      // create an array of the data
      *data_array = malloc(*items* *attr*sizeof(double));
      *data_labels = malloc(*items*sizeof(int));
      printf("data array size = %d\n\n",*items* *attr);

      i=0;
      j=0;

      while (fgets(line, 10000, fp) != NULL) 
      { 

        // we break line into tokens using our delimeters list declared at the beginning of the function
        result = strtok(line,delims);
        //printf("%d\n",i);

        while(result != NULL) {
          (*data_array)[i++] = atof(result);
          //printf("%f\n",(*data_array)[i-1]);
          result = strtok(NULL, delims);
        }
        //(*data_labels)[j++] = (int)((*data_array)[--i]);
      }
      /*
      printf("j=%d,items=%d\n",j,*items);
      for (i=0;i<*items;i++)
      {
        printf("i=%d,items=%d,",i,*items);
        printf("label=%d\n",(*data_labels)[i]);
      }
      // close the file
      fclose(fp);

      return 0;
    }

2 个答案:

答案 0 :(得分:1)

您对strtok的致电是好的 - 我会用等效的<{1}}取代delims的初始化

char *delims = "\t ,";

并切换到可重新进入的strtok_r,但这不是导致崩溃的原因(除非您处于并发环境中,在这种情况下,崩溃应该是随机的,并且大多数与被解析的文件无关)。

这条线非常可疑:

(*data_array)[i++] = atof(result);

您要么没有为data_array指针所指向的数组分配足够的空间,要么data_array本身无效。

答案 1 :(得分:1)

元素(*data_array)可以容纳的数量少于您尝试填充循环的令牌数量。