如何使用fscanf解析csv文件?

时间:2019-04-12 02:51:03

标签: c file fwrite

我需要使用fscanf解析一个csv文件。 我的文件就像:

nroInscricao,nota,data,cidade,nomeEscola
13893,353.9,26/11/2016,,FRANCISCO RIBEIRO CARRIL
13595,472.2,,Salgueiro,ALFREDO GUEDES
13894,614.4,28/11/2016,Recife,JOAO DE MOURA GUIMARAES
13880,403.2,29/11/2016,Fortaleza,ANTONIO DIAS PASCHOAL PR
13881,373.7,,Sao Jose da Tapera,DONIZETTI TAVARES DE LIM
13882,394.8,01/12/2016,Sao Bernardo do Cam,JUSTINO GOMES DE CASTRO 

我需要阅读每个字段。 结构就像:

typedef struct RegDados{

    int numberIns;
    double grade;
    char data[10]; 
    char city[50];
    char name[50];

} RegDados;

它与文件上显示的顺序相同。

有什么想法吗?

3 个答案:

答案 0 :(得分:1)

应该有一个更简单的方法来做到这一点。不幸的是,scanfstrtok都不能很好地处理空白字段。我将数组的大小每个增加1,以容纳字符串上的空终止符。

如果我做对了,它会处理错误的输入:缺少字段,最后一行缺少换行符或数据过大。我使用了getline来分配缓冲区以容纳数据,而不是依赖固定大小的缓冲区和希望,但是请注意,此功能不是最可移植的。

#include <stdio.h>
#include <string.h>
#include <malloc.h>
#include <stdlib.h>

typedef struct RegDados{

    int numberIns;
    double grade;
    char data[11]; 
    char city[51];
    char name[51];

} RegDados;

#define MIN(x,y) ((x) < (y) ? (x) : (y))

int main (int argc, char **argv) {
    RegDados record;
    char *line = NULL;
    size_t length = 0;
    while (getline (&line, &length, stdin) != EOF) {
        memset (&record, 0, sizeof (record));
        char *value = line;
        for (int field = 0; field < 5; field++) {
            char *endfield = strchr (value, ',');
            if (!endfield) {
                endfield = strchr (value, '\n');
                if (!endfield) {
                    endfield = strchr (value, '\0');
                }
            }
            int field_length = endfield - value;
            switch (field) {
                case 0: record.numberIns = atoi (value); break;
                case 1: record.grade = atof (value); break;
                case 2: strncpy (record.data, value, MIN (10, field_length)); break;
                case 3: strncpy (record.city, value, MIN (50, field_length)); break;
                case 4: strncpy (record.name, value, MIN (50, field_length)); break;
            }
            if (*endfield == '\n' || *endfield == '\0') break;
            value = endfield + 1;
        }
        printf ("Number: %d\n  Grade: %lf\n  date: %s\n  city: %s\n  who: %s\n",
            record.numberIns,
            record.grade,
            record.data,
            record.city,
            record.name);
    }
    free (line);
    return 0;
}

最好将其分解为函数(例如,一个parse_student_record接受一个字符串,并在成功时返回一个新分配的记录,并且可能还会返回其他记录),我应该对字符串字段长度和枚举字段顺序,而不是对值进行硬编码。如果这是一个真实的项目,我将使用strtolstrtof而不是atoiatof,然后使用这些函数提供的endptr一些验证(期望endptr == endfield否则内容无效),但希望这可以帮助您入门。

答案 1 :(得分:0)

在处理空字段时解析字段的一种灵活而强大的方法是使用简单的 start end 指针以及strcspn,其中reject参数为",\n"。这样,您就可以在每一行中移动指针,并使用switch()语句和指针算术将字段分为相应的结构成员。

在一个示例中分步进行,您可以执行以下操作:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include <limits.h>

#define EMPTY  -1   /* define value for number if empty-fields */
#define NMEMB   5   /* number of members in struct */
#define DATSZ  12   /* 10 not large enough for data string */
#define CNSZ   50   /* chars in city, name */
#define NLINE 128   /* number of struct (csv lines) */
#define MAXC 1024   /* number of chars in read buffer */

typedef struct RegDados {
    int numberIns;
    double grade;
    char data[DATSZ]; 
    char city[CNSZ];
    char name[CNSZ];
} RegDados;

int main (int argc, char **argv) {

    char buf[MAXC];     /* read buffer */
    size_t ndx = 0;     /* struct index */
    /* initialize array of struct */
    RegDados reg[NLINE] = {{ .numberIns = EMPTY, .grade = EMPTY }};
    /* use filename provided as 1st argument (stdin by default) */
    FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;

    if (!fp) {  /* validate file open for reading */
        perror ("file open failed");
        return 1;
    }
    fgets (buf, MAXC, fp);  /* read/discard header line */

    /* read each line up to a max of NLINE lines */
    while (ndx < NLINE && fgets (buf, MAXC, fp)) {
        size_t n = 0;   /* current field number */
        char *p = buf,  /* beginning pointer in field */
            *ep = p + strcspn (p, ",\n");   /* end pointer in field */

        while (*p && *p != '\n') {  /* for each field */
            switch (n) {    /* switch on field number */
                case 0:
                    if (ep > p && isdigit (*p)) {   /* non-empty w/digit */
                        char *endptr;   /* endptr for strtol */
                        long tmp = strtol (p, &endptr, 0);  /* convert */
                        if (p == endptr)    /* validate */
                            fputs ("error: no digits, numberIns.\n", stderr);
                        else if (errno)
                            fputs ("error: over/underflow.\n", stderr);
                        else if (tmp < INT_MIN || INT_MAX < tmp)
                            fputs ("error: exceeds range of int.\n", stderr);
                        else
                            reg[ndx].numberIns = tmp;
                    }
                    break;
                case 1:
                    if (ep > p && isdigit (*p)) {   /* non-empty w/digit */
                        char *endptr;   /* endptr for strtod */
                        double tmp = strtod (p, &endptr);   /* convert */
                        if (p == endptr)    /* validate */
                            fputs ("error: no digits, numberIns.\n", stderr);
                        else if (errno)
                            fputs ("error: over/underflow.\n", stderr);
                        else
                            reg[ndx].grade = tmp;
                    }
                    break;
                case 2:
                    if (ep > p && ep - p < DATSZ) { /* chars, will it fit? */
                        memcpy (reg[ndx].data, p, ep - p);  /* copy chars */
                        reg[ndx].data[ep - p] = 0;  /* nul-terminate */
                    }
                    break;
                case 3:
                    if (ep > p && ep - p < CNSZ) {  /* chars, will it fit? */
                        memcpy (reg[ndx].city, p, ep - p);  /* copy chars */
                        reg[ndx].city[ep - p] = 0;  /* nul-terminate */
                    }
                    break;
                case 4:
                    if (ep > p && ep - p < CNSZ) {  /* chars, will it fit? */
                        memcpy (reg[ndx].name, p, ep - p);  /* copy chars */
                        reg[ndx].name[ep - p] = 0;  /* nul-terminate */
                    }
                    break;
                default:    /* set default to indicate error */
                    fputs ("error: you shouldn't get here.\n", stderr);
                    break;
            }
            p = ++ep;       /* set p to beginning of next field */
            ep = p + strcspn (p, ",\n");    /* find end of field */
            n++;    /* increment field count */
        }
        ndx++;      /* increment index */
    }

    if (fp != stdin) fclose (fp);   /* close file if not stdin */

    for (size_t i = 0; i < ndx; i++)    /* output data */
        printf ("%6d %6.1f  %-12s %-20s %s\n", reg[i].numberIns,
                reg[i].grade, reg[i].data, reg[i].city, reg[i].name);

    return 0;
}

示例输入文件

$ cat dat/csvwempty.csv
nroInscricao,nota,data,cidade,nomeEscola
13893,353.9,26/11/2016,,FRANCISCO RIBEIRO CARRIL
13595,472.2,,Salgueiro,ALFREDO GUEDES
13894,614.4,28/11/2016,Recife,JOAO DE MOURA GUIMARAES
13880,403.2,29/11/2016,Fortaleza,ANTONIO DIAS PASCHOAL PR
13881,373.7,,Sao Jose da Tapera,DONIZETTI TAVARES DE LIM
13882,394.8,01/12/2016,Sao Bernardo do Cam,JUSTINO GOMES DE CASTRO

使用/输出示例

$ ./bin/parsecsv dat/csvwempty.csv
 13893  353.9  26/11/2016                        FRANCISCO RIBEIRO CARRIL
 13595  472.2               Salgueiro            ALFREDO GUEDES
 13894  614.4  28/11/2016   Recife               JOAO DE MOURA GUIMARAES
 13880  403.2  29/11/2016   Fortaleza            ANTONIO DIAS PASCHOAL PR
 13881  373.7               Sao Jose da Tapera   DONIZETTI TAVARES DE LIM
 13882  394.8  01/12/2016   Sao Bernardo do Cam  JUSTINO GOMES DE CASTRO

仔细检查一下,如果还有其他问题,请告诉我。

答案 2 :(得分:0)

strsep将处理空白字段:

#include <string.h>
#include <stdio.h>

int main(int argc, char * argv[]){

    char line[1024];
    char *tok;
    char *copy;
    while(gets(line)){
                copy=&line;//strdup(line);
                int pos=0;
                while((tok=strsep(&copy,","))!=NULL){
                        printf("Token is: %s. Position is: %d\n",tok,pos);
                        pos++;
                        //Do stuff here based on position and null/string value
                }
                free(copy);
    }
    return 0;
}