所以我成功读取了这样的CSV文件(该CSV文件有4列,分别带有x,y,u,v)-
while(4 == fscanf(fp, "%f %f %f %f\n", &d.xvalue, &d.yvalue, &d.uvalue, &d.vvalue))
{
dataset = realloc(dataset, sizeof(*dataset) * (total + 1));
dataset[total] = d;
total++;
}
结构-
typedef struct {
double xvalue;
double yvalue;
double uvalue;
double vvalue;
} flow_data;
但是对于大型CSV文件,它执行了超过100,000次的重新分配。我已经尝试将其简化为类似这样的内容,但现在根本无法阅读。
// Reading flow_data.csv
FILE* fp = fopen(flow_file, "r");
// Checking if the file has been read succesfully
if( fp == NULL)
{
perror("Error opening file");
exit(1);
}
char buf[500];
fgets(buf, sizeof(buf), fp); // Skip the first line
int total = 0;
int buf_size = INITIAL_SIZE;
flow_data d;
flow_data* dataset = (flow_data*)malloc(sizeof(flow_data) * buf_size);
while(4 == fscanf(fp, "%lf, %lf, %lf, %lf\n", &d.xvalue, &d.yvalue, &d.uvalue, &d.vvalue))
{
if (d.xvalue >= 0)
{
if (total >= buf_size) {
buf_size = buf_size * 2;
dataset = realloc(dataset,buf_size * sizeof(flow_data));
if (dataset == NULL) {
printf("error allocating memory!\n");
exit(EXIT_FAILURE);
}
}
dataset[total] = d;
total++;
}
}
仅当X值大于20时才读取该行。我在做什么错了?
答案 0 :(得分:2)
以下是对d.xvalue >= 20
条件进行测试的大小加倍代码的实现:
#include <stdio.h>
#include <stdlib.h>
typedef struct
{
double xvalue;
double yvalue;
double uvalue;
double vvalue;
} flow_data;
static void print_dataset(size_t size, flow_data dataset[size]);
int main(void)
{
char flow_file[] = "flow_data.csv";
FILE *fp = fopen(flow_file, "r");
if (fp == NULL)
{
perror(flow_file);
exit(1);
}
char buf[500];
fgets(buf, sizeof(buf), fp); // Skip the first line
size_t total = 0;
size_t buf_size = 0;
flow_data *dataset = NULL;
flow_data d;
while (4 == fscanf(fp, "%lf, %lf, %lf, %lf\n", &d.xvalue, &d.yvalue, &d.uvalue, &d.vvalue))
{
if (d.xvalue >= 20)
{
if (total >= buf_size)
{
size_t new_size = buf_size * 2 + 2;
flow_data *new_data = realloc(dataset, new_size * sizeof(flow_data));
if (new_data == NULL)
{
printf("error allocating memory!\n");
free(dataset);
exit(EXIT_FAILURE);
}
dataset = new_data;
buf_size = new_size;
}
dataset[total++] = d;
}
}
/* Very unlikely to fail */
dataset = realloc(dataset, total * sizeof(flow_data));
print_dataset(total, dataset);
free(dataset);
fclose(fp);
return 0;
}
static void print_dataset(size_t size, flow_data dataset[size])
{
for (size_t i = 0; i < size; i++)
{
printf("[%zu]: (%f, %f) (%f, %f)\n", i,
dataset[i].xvalue, dataset[i].yvalue,
dataset[i].uvalue, dataset[i].vvalue);
}
}
给出这样的示例数据文件(40行):
22.3,22.3,25.3,24.8
26.5,25.3,21.5,11.5
17.5,12.6,12.8,17.6
17.2,28.5,17.0,23.7
29.6,29.1,28.5,14.2
20.1,15.5,21.5,16.4
26.9,29.0,25.6,13.1
16.7,29.0,26.7,16.4
22.8,14.7,17.4,12.5
27.2,13.8,26.6,25.0
26.7,13.1,26.2,19.3
20.8,25.3,28.8,10.6
27.0,27.0,16.7,10.4
18.4,23.3,16.9,25.9
27.7,17.3,29.2,26.8
14.2,27.6,16.2,10.1
21.7,11.4,21.2,12.6
20.6,12.4,25.5,15.5
29.0,11.4,12.1,18.1
25.7,22.3,24.4,12.9
10.6,22.1,25.6,18.7
25.5,25.1,25.0,29.7
27.3,11.4,16.1,11.3
14.7,11.1,24.0,15.1
29.7,12.1,11.1,22.5
10.5,13.7,22.6,22.6
25.1,16.3,21.2,13.8
17.1,11.5,12.0,24.6
17.5,24.1,19.3,24.8
12.5,29.8,19.7,10.6
11.2,19.6,17.8,24.5
22.4,12.9,10.3,25.8
17.7,10.1,27.9,21.4
18.5,11.7,16.5,24.5
17.0,26.0,20.2,29.6
20.1,11.8,22.3,20.7
25.1,14.1,25.3,13.5
22.6,14.1,26.4,16.2
13.0,29.2,14.7,19.6
17.3,12.6,28.2,15.1
它产生输出:
[0]: (26.500000, 25.300000) (21.500000, 11.500000)
[1]: (29.600000, 29.100000) (28.500000, 14.200000)
[2]: (20.100000, 15.500000) (21.500000, 16.400000)
[3]: (26.900000, 29.000000) (25.600000, 13.100000)
[4]: (22.800000, 14.700000) (17.400000, 12.500000)
[5]: (27.200000, 13.800000) (26.600000, 25.000000)
[6]: (26.700000, 13.100000) (26.200000, 19.300000)
[7]: (20.800000, 25.300000) (28.800000, 10.600000)
[8]: (27.000000, 27.000000) (16.700000, 10.400000)
[9]: (27.700000, 17.300000) (29.200000, 26.800000)
[10]: (21.700000, 11.400000) (21.200000, 12.600000)
[11]: (20.600000, 12.400000) (25.500000, 15.500000)
[12]: (29.000000, 11.400000) (12.100000, 18.100000)
[13]: (25.700000, 22.300000) (24.400000, 12.900000)
[14]: (25.500000, 25.100000) (25.000000, 29.700000)
[15]: (27.300000, 11.400000) (16.100000, 11.300000)
[16]: (29.700000, 12.100000) (11.100000, 22.500000)
[17]: (25.100000, 16.300000) (21.200000, 13.800000)
[18]: (22.400000, 12.900000) (10.300000, 25.800000)
[19]: (20.100000, 11.800000) (22.300000, 20.700000)
[20]: (25.100000, 14.100000) (25.300000, 13.500000)
[21]: (22.600000, 14.100000) (26.400000, 16.200000)
在运行带有GCC 8.2.0和Valgrind 3.14.0.GIT的macOS 10.13.6 High Sierra的Mac上进行测试时,它可以在Valgrind下正常运行(由Git存储库的副本构建的Valgrind版本)而不是正式版本。
==76412== HEAP SUMMARY:
==76412== in use at exit: 23,135 bytes in 168 blocks
==76412== total heap usage: 195 allocs, 27 frees, 99,487 bytes allocated
==76412==
==76412== LEAK SUMMARY:
==76412== definitely lost: 0 bytes in 0 blocks
==76412== indirectly lost: 0 bytes in 0 blocks
==76412== possibly lost: 0 bytes in 0 blocks
==76412== still reachable: 0 bytes in 0 blocks
==76412== suppressed: 23,135 bytes in 168 blocks