如何读取文本文件中的逗号分隔行并将其字段插入到struct指针数组中?

时间:2017-01-31 03:55:19

标签: c arrays pointers struct strtok

我一直试图想出这个问题一段时间了,我觉得我必须要亲近。基本上,我有一个数据文件,其中包含由新行分隔的各种国家/地区记录。每条记录都包含逗号分隔的字段,我试图从中提取某些字段。

例如(作为一行):

60,AFG,阿富汗,亚洲,南亚和中亚,652090,1919,22720000,55.9,5976,Afganistan / Afqanestan,伊斯兰酋长国,Mohammad Omar,1,AF

这些行中的每一行都将构成一个结构。本质上,我想读取这些行中的每一行并将其插入到结构指针数组中(如此动态)。我也只想要特定的领域。当我“标记”该行时,我想要代码,名称,人口和生命expec的字段。分别为:

AFG,阿富汗,22720000,45。

我的想法是使用fgets()来读取文件中的每一行,并在一个循环malloc()中指针的一些内存,在我想要的字段上标记,然后插入。但是,我正在做的事情一定是错的,因为各种测试似乎没有在我的输出中显示任何内容。

到目前为止,这是我的工作。我将不胜感激任何帮助。

#include "allheaders.h" // contains all common headers for personal use

#define BUF_SIZE 512
#define NUM_RECS 238

typedef struct {
   char code[4];
   char name[40];
   int population;
   float lifeExpectancy;
} Country;

typedef Country *countryPtr;

int main( int argc, const char* argv[] ) {

/* Opening the file */
FILE *filePtr;  // pointer to file
if ((filePtr = fopen("AllCountries.dat", "r")) == NULL) {   // if couldn't open file
    printf("Error opening file\n"); // error message
    exit(1);
}

/* Reading the file */
char buffer[BUF_SIZE];  // buffer to read
int index = 0;
char *token;
countryPtr *myCountries = malloc(sizeof(*myCountries) * NUM_RECS);
for(int i = 0; i < NUM_RECS; ++i) {
    myCountries[i] = malloc(sizeof(*myCountries[i]));
}

while (fgets(buffer, BUF_SIZE, filePtr) != NULL) {

    token = strtok(buffer,",");
    token = strtok(NULL, ",");
    strcpy(myCountries[index]->code, token);
    token = strtok(NULL, ",");
    strcpy(myCountries[index]->name, token);
    token = strtok(NULL, ",");
    token = strtok(NULL, ",");
    token = strtok(NULL, ",");
    token = strtok(NULL, ",");
    token = strtok(NULL, ",");
    myCountries[index]->population = atoi(token);
    token = strtok(NULL, ",");
    myCountries[index]->lifeExpectancy = atof(token);
    //printf("%s", buffer);
    index++;
}

printf("%s", myCountries[1]->code); // test?
free(myCountries);

}

2 个答案:

答案 0 :(得分:1)

看看以下内容。 首先,您需要做一些工作来改善标记为NYI的区域

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>

#define BUF_SIZE 512
#define NUM_RECS 238

typedef struct {
  char code[4]; // NYI - magic numbers
  char name[41]; // NYI - magic numbers
  int population; // NYI - what if atoi fails? 
  float lifeExpectancy; // NYI - what if atof fails?
} Country;

typedef Country* countryPtr;

int main( int argc, const char* argv[] ) {
  /* Opening the file */
  FILE *filePtr;  // pointer to file
  if ((filePtr = fopen("a.txt", "r")) == NULL) {   // if couldn't open file
    printf("Error opening file\n"); // error message
    exit(1);
  }

  /* Reading the file */
  char buffer[BUF_SIZE];  // buffer to read
  int index=0;
  char *token; // NYI - initial value
  countryPtr* myCountries = calloc(NUM_RECS, sizeof(countryPtr));
  for(int i = 0; i < NUM_RECS; ++i) {
    myCountries[i] = calloc(1, sizeof(Country));
  }

  while (fgets(buffer, BUF_SIZE, filePtr) != NULL) {
    // NYI - magic lengths / overflow strcpy targets

    token = strtok(buffer,","); // NYI - This is probably not the best way to do this. At least fold into a loop.
    token = strtok(NULL, ",");
    strcpy(myCountries[index]->code, token);
    token = strtok(NULL, ",");
    strcpy(myCountries[index]->name, token);
    token = strtok(NULL, ",");
    token = strtok(NULL, ",");
    token = strtok(NULL, ",");
    token = strtok(NULL, ",");
    token = strtok(NULL, ",");

    myCountries[index]->population = atoi(token); // NYI - atoi failure
    token = strtok(NULL, ",");
    myCountries[index]->lifeExpectancy = atof(token); // NYI - atof failure
    printf("%s", buffer);
    index++;
  }

  printf("%s\n", myCountries[0]->code); // test? NYI - need more proof
  free(myCountries); // NYI - this is a sequence - need to free each of the new elements 
}

答案 1 :(得分:0)

我根据您的代码和数据文件采用了不同的方法来解决它。我测试了它。它适用于您显示的记录类型的文件。希望它能解释一些事情,让你的工作更轻松,并为你提供一个好的工作场所。

我不喜欢以一种必须预先计算(耗时)的方式编写程序,或者在一般原则上预先知道文件中的记录数量,除非在极少数情况下。因此,在阅读文件时,我更喜欢随意分配内存。现在,如果有一个大文件和大量数据,那么你必须提出一个更好的内存管理方案,而不是把它全部留在内存中。在某些时候,您最好使用某种类型的固定数据库解决方案。 MySQL,API,库,解析器等......但这应该适用于小文件。

通常在UNIX上的C中,exit(0)表示成功,exit(-1)表示失败。此外,由于您的国家/地区代码为3个字符,因此保留字段的字段必须至少为4个字符才能跟踪&#39; \ 0&#39;

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <strings.h>

#define MAXRECL   512
#define MAXFIELDS 100
#define MAXFIELDL 80

// Field indicies

#define COUNTRY_CODE    1
#define COUNTRY_NAME    2
#define POPULATION      7
#define LIFE_EXPECTANCY 8

#define CCMAX           3
#define CNMAX           40

typedef struct Country {
   struct Country *next;
   char  code[CCMAX + 1];  // (Need room for trailing '\0')
   char  name[CNMAX + 1];  // (Need room for trailing '\0')
   int   population;
   float lifeExpectancy;
} country_t;

country_t *countryRecords;

int main( int argc, const char* argv[] ) {

    FILE *fp;
    if ((fp = fopen("AllCountries.dat", "r")) == NULL) {  
        printf("Error opening file\n"); 
        exit(-1);
    }
    int totalCountries = 0;
    char buf[MAXRECL];
    char fields[MAXFIELDS][MAXFIELDL];
    country_t *prev_country = NULL;
    while (fgets(buf, MAXRECL, fp) != NULL) {
        ++totalCountries;      
        country_t *country = calloc(sizeof(struct Country), 1);
        if (country == NULL) {
            fprintf(stderr, "Out of memory\n");
            exit(-1);
        }
        char *field = strtok(buf, ",");
        int i = 0;
        while(field != NULL) {
          strncpy(fields[i++], field, MAXFIELDL);
          field = strtok(NULL, ",");
        }        
        strcpy(country->code, fields[COUNTRY_CODE]);
        strcpy(country->name, fields[COUNTRY_NAME]);
        country->population = atoi(fields[POPULATION]);
        country->lifeExpectancy = atof(fields[LIFE_EXPECTANCY]);

        if (countryRecords == NULL)
            countryRecords = country;
        else 
            prev_country->next = country;
        prev_country = country;  
    }
    printf("Total countries: %d\n", totalCountries);

    country_t *country = countryRecords;
    while(country != NULL) {
        printf("%3s %30s  Population: %7d Life Expectancy: %5.2f\n",
            country->code, country->name, country->population, country->lifeExpectancy); 
        country_t *prev_country = country;
        country = country->next;
        free(prev_country);
    }
    printf("Done\n");
    exit(0);
}