我需要使用fscanf解析一个csv文件。 我的文件就像:
nroInscricao,nota,data,cidade,nomeEscola
13893,353.9,26/11/2016,,FRANCISCO RIBEIRO CARRIL
13595,472.2,,Salgueiro,ALFREDO GUEDES
13894,614.4,28/11/2016,Recife,JOAO DE MOURA GUIMARAES
13880,403.2,29/11/2016,Fortaleza,ANTONIO DIAS PASCHOAL PR
13881,373.7,,Sao Jose da Tapera,DONIZETTI TAVARES DE LIM
13882,394.8,01/12/2016,Sao Bernardo do Cam,JUSTINO GOMES DE CASTRO
我需要阅读每个字段。 结构就像:
typedef struct RegDados{
int numberIns;
double grade;
char data[10];
char city[50];
char name[50];
} RegDados;
它与文件上显示的顺序相同。
有什么想法吗?
答案 0 :(得分:1)
应该有一个更简单的方法来做到这一点。不幸的是,scanf
和strtok
都不能很好地处理空白字段。我将数组的大小每个增加1,以容纳字符串上的空终止符。
如果我做对了,它会处理错误的输入:缺少字段,最后一行缺少换行符或数据过大。我使用了getline
来分配缓冲区以容纳数据,而不是依赖固定大小的缓冲区和希望,但是请注意,此功能不是最可移植的。
#include <stdio.h>
#include <string.h>
#include <malloc.h>
#include <stdlib.h>
typedef struct RegDados{
int numberIns;
double grade;
char data[11];
char city[51];
char name[51];
} RegDados;
#define MIN(x,y) ((x) < (y) ? (x) : (y))
int main (int argc, char **argv) {
RegDados record;
char *line = NULL;
size_t length = 0;
while (getline (&line, &length, stdin) != EOF) {
memset (&record, 0, sizeof (record));
char *value = line;
for (int field = 0; field < 5; field++) {
char *endfield = strchr (value, ',');
if (!endfield) {
endfield = strchr (value, '\n');
if (!endfield) {
endfield = strchr (value, '\0');
}
}
int field_length = endfield - value;
switch (field) {
case 0: record.numberIns = atoi (value); break;
case 1: record.grade = atof (value); break;
case 2: strncpy (record.data, value, MIN (10, field_length)); break;
case 3: strncpy (record.city, value, MIN (50, field_length)); break;
case 4: strncpy (record.name, value, MIN (50, field_length)); break;
}
if (*endfield == '\n' || *endfield == '\0') break;
value = endfield + 1;
}
printf ("Number: %d\n Grade: %lf\n date: %s\n city: %s\n who: %s\n",
record.numberIns,
record.grade,
record.data,
record.city,
record.name);
}
free (line);
return 0;
}
最好将其分解为函数(例如,一个parse_student_record
接受一个字符串,并在成功时返回一个新分配的记录,并且可能还会返回其他记录),我应该对字符串字段长度和枚举字段顺序,而不是对值进行硬编码。如果这是一个真实的项目,我将使用strtol
和strtof
而不是atoi
和atof
,然后使用这些函数提供的endptr
一些验证(期望endptr == endfield
否则内容无效),但希望这可以帮助您入门。
答案 1 :(得分:0)
在处理空字段时解析字段的一种灵活而强大的方法是使用简单的 start 和 end 指针以及strcspn
,其中reject
参数为",\n"
。这样,您就可以在每一行中移动指针,并使用switch()
语句和指针算术将字段分为相应的结构成员。
在一个示例中分步进行,您可以执行以下操作:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include <limits.h>
#define EMPTY -1 /* define value for number if empty-fields */
#define NMEMB 5 /* number of members in struct */
#define DATSZ 12 /* 10 not large enough for data string */
#define CNSZ 50 /* chars in city, name */
#define NLINE 128 /* number of struct (csv lines) */
#define MAXC 1024 /* number of chars in read buffer */
typedef struct RegDados {
int numberIns;
double grade;
char data[DATSZ];
char city[CNSZ];
char name[CNSZ];
} RegDados;
int main (int argc, char **argv) {
char buf[MAXC]; /* read buffer */
size_t ndx = 0; /* struct index */
/* initialize array of struct */
RegDados reg[NLINE] = {{ .numberIns = EMPTY, .grade = EMPTY }};
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
fgets (buf, MAXC, fp); /* read/discard header line */
/* read each line up to a max of NLINE lines */
while (ndx < NLINE && fgets (buf, MAXC, fp)) {
size_t n = 0; /* current field number */
char *p = buf, /* beginning pointer in field */
*ep = p + strcspn (p, ",\n"); /* end pointer in field */
while (*p && *p != '\n') { /* for each field */
switch (n) { /* switch on field number */
case 0:
if (ep > p && isdigit (*p)) { /* non-empty w/digit */
char *endptr; /* endptr for strtol */
long tmp = strtol (p, &endptr, 0); /* convert */
if (p == endptr) /* validate */
fputs ("error: no digits, numberIns.\n", stderr);
else if (errno)
fputs ("error: over/underflow.\n", stderr);
else if (tmp < INT_MIN || INT_MAX < tmp)
fputs ("error: exceeds range of int.\n", stderr);
else
reg[ndx].numberIns = tmp;
}
break;
case 1:
if (ep > p && isdigit (*p)) { /* non-empty w/digit */
char *endptr; /* endptr for strtod */
double tmp = strtod (p, &endptr); /* convert */
if (p == endptr) /* validate */
fputs ("error: no digits, numberIns.\n", stderr);
else if (errno)
fputs ("error: over/underflow.\n", stderr);
else
reg[ndx].grade = tmp;
}
break;
case 2:
if (ep > p && ep - p < DATSZ) { /* chars, will it fit? */
memcpy (reg[ndx].data, p, ep - p); /* copy chars */
reg[ndx].data[ep - p] = 0; /* nul-terminate */
}
break;
case 3:
if (ep > p && ep - p < CNSZ) { /* chars, will it fit? */
memcpy (reg[ndx].city, p, ep - p); /* copy chars */
reg[ndx].city[ep - p] = 0; /* nul-terminate */
}
break;
case 4:
if (ep > p && ep - p < CNSZ) { /* chars, will it fit? */
memcpy (reg[ndx].name, p, ep - p); /* copy chars */
reg[ndx].name[ep - p] = 0; /* nul-terminate */
}
break;
default: /* set default to indicate error */
fputs ("error: you shouldn't get here.\n", stderr);
break;
}
p = ++ep; /* set p to beginning of next field */
ep = p + strcspn (p, ",\n"); /* find end of field */
n++; /* increment field count */
}
ndx++; /* increment index */
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
for (size_t i = 0; i < ndx; i++) /* output data */
printf ("%6d %6.1f %-12s %-20s %s\n", reg[i].numberIns,
reg[i].grade, reg[i].data, reg[i].city, reg[i].name);
return 0;
}
示例输入文件
$ cat dat/csvwempty.csv
nroInscricao,nota,data,cidade,nomeEscola
13893,353.9,26/11/2016,,FRANCISCO RIBEIRO CARRIL
13595,472.2,,Salgueiro,ALFREDO GUEDES
13894,614.4,28/11/2016,Recife,JOAO DE MOURA GUIMARAES
13880,403.2,29/11/2016,Fortaleza,ANTONIO DIAS PASCHOAL PR
13881,373.7,,Sao Jose da Tapera,DONIZETTI TAVARES DE LIM
13882,394.8,01/12/2016,Sao Bernardo do Cam,JUSTINO GOMES DE CASTRO
使用/输出示例
$ ./bin/parsecsv dat/csvwempty.csv
13893 353.9 26/11/2016 FRANCISCO RIBEIRO CARRIL
13595 472.2 Salgueiro ALFREDO GUEDES
13894 614.4 28/11/2016 Recife JOAO DE MOURA GUIMARAES
13880 403.2 29/11/2016 Fortaleza ANTONIO DIAS PASCHOAL PR
13881 373.7 Sao Jose da Tapera DONIZETTI TAVARES DE LIM
13882 394.8 01/12/2016 Sao Bernardo do Cam JUSTINO GOMES DE CASTRO
仔细检查一下,如果还有其他问题,请告诉我。
答案 2 :(得分:0)
strsep
将处理空白字段:
#include <string.h>
#include <stdio.h>
int main(int argc, char * argv[]){
char line[1024];
char *tok;
char *copy;
while(gets(line)){
copy=&line;//strdup(line);
int pos=0;
while((tok=strsep(©,","))!=NULL){
printf("Token is: %s. Position is: %d\n",tok,pos);
pos++;
//Do stuff here based on position and null/string value
}
free(copy);
}
return 0;
}