读取.txt文件并将数据保存为C中的矩阵

时间:2015-12-30 15:02:20

标签: c matrix

我有兴趣阅读.txt文件,并将数据保存在C中的矩阵中。

dist.txt is the following:
Distance    Amsterdam   Antwerp Athens  Barcelona   Berlin
Amsterdam   -   160 3082    1639    649
Antwerp 160 -   2766    1465    723
Athens  3082    2766    -   3312    2552
Barcelona   1639    1465    3312    -   1899
Berlin  649 723 2552    1899    -

事实上它有更多的城市,但没关系。

我想阅读这份文件并记录距离。我尝试过以下代码:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <malloc.h>

#define rows 6
#define cols 6

int main()
{
    FILE *nansa;
    char *buffer;
    int ret,row=0,i,j;

    char delims[]=" \t";
    char *result=NULL;

    double **mat=malloc( rows*sizeof(double*) );
    for(i=0; i<rows; i++)
    {
        mat[i]=malloc( cols*sizeof(double) ); 
    }

    if ((nansa=fopen("dist.txt","r"))==NULL)
    {
        fprintf(stdout, "Error\n"); 
        return -1;
    }
    while(!feof(nansa))
    {
        buffer=malloc( sizeof(char)*4096 );
        memset(buffer,0,4096);
        ret=fscanf(nansa, "%4095[^\n]\n", buffer);
        if(ret != EOF) 
        {
            int field=0;
            result=strtok(buffer,delims);
            while(result != NULL)
            {
                if(field>4) break;
                mat[row][field]=atof(result);
                result=strtok(NULL,delims);
                field++;
            }
            ++row;
        }
        free(buffer);
    }
    fclose(nansa);
    for(i=0; i<rows; i++)
    {
        for(j=0; j<cols; j++)
        {
            printf("%g%s", mat[i][j], j<cols-1 ? "\t" : "\n");
            free(mat[i]);
        }
    }
    free(mat);
    return 0;
}

但我不能得到我想要的东西......而且我不知道如何分隔名称和距离(字符和整数)。如果有人能帮助我,我将非常感激!

6 个答案:

答案 0 :(得分:5)

尽管使用fgets来阅读每一行(feof是错误的)很诱人,但问题只是少数城市的一个例子:也许有10000个。所以我做了假设任何城市的名称小于64(仅用于输入)。保留的内存对于名称的实际长度是正确的。

行和列将是相同的,因此没有必要具有不同的定义:实际上我只定义了城市的数量。我为城市名称(相同的向下)和距离使用单独的数组。

为了简单起见,我已经完成了错误检查,但在没有消息的情况下中止了。但是需要修改的地方是城市是一个多字的名称,如洛杉矶(%s停在任何空白处)。您需要一种不同的方法,或者使用下划线来破坏city_name。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define cities 5

int main(void){

    FILE *nansa;
    char buffer[64];
    char distname[64];                      // just to save work
    char *city[cities];                     // city names
    int *dist[cities];                      // distance array
    int i, j, len, wid = 0;

    if((nansa = fopen("dist.txt","r")) == NULL)
        exit(1);                            // file open fault

    // read the headings
    if(fscanf(nansa, "%63s", buffer) != 1)  // read the word for "distance"
        exit(1);                            // fscanf fault
    strcpy(distname, buffer);

    for(i=0; i<cities; i++) {               // read the city names
        if(fscanf(nansa, "%63s", buffer) != 1)
            exit(1);                        // fscanf fault
        len = strlen(buffer) + 1;
        if (wid < len)
            wid = len;                      // column width
        if((city[i] = malloc(len)) == NULL) // memory for city name
            exit(1);                        // malloc fault
        strcpy(city[i], buffer);
    }

    // read the data
    for(j=0; j<cities; j++) {               // read each table line
        if((dist[j] = malloc(cities * sizeof(int))) == NULL)    // memory for distance chart
            exit(1);                        // malloc fault
        if(fscanf(nansa, "%s", buffer) != 1)   // skip the city name
            exit(1);                        // fscanf fault
        for(i=0; i<cities; i++) {           // read each table line
            if(fscanf(nansa, "%63s", buffer) != 1)  // read the distance
                exit(1);                    // fscanf fault
            dist[j][i] = atoi(buffer);
        }
    }

    fclose(nansa);

    // display the table headings
    printf("%-*s", wid, distname);          // use the terminology in the file
    for(i=0; i<cities; i++)                 // each city name
        printf("%-*s", wid, city[i]);
    printf("\n");

    // display each line
    for(j=0; j<cities; j++) {
        printf("%-*s", wid, city[j]);       // start with city name
        for(i=0; i<cities; i++) {           // each table data
            if(dist[j][i])
                printf("%-*d", wid, dist[j][i]);
            else
                printf("%-*c", wid, '-');
        }
        printf("\n");

    }

    // free the memory
    for(i=0; i<cities; i++) {
        free (city[i]);
        free (dist[i]);
    }
    return 0;
}

节目输出:

Distance  Amsterdam Antwerp   Athens    Barcelona Berlin
Amsterdam -         160       3082      1639      649
Antwerp   160       -         2766      1465      723
Athens    3082      2766      -         3312      2552
Barcelona 1639      1465      3312      -         1899
Berlin    649       723       2552      1899      -

答案 1 :(得分:3)

这似乎是引发许多从头开始重写的新解决方案的问题之一。这是一个允许任意数量的城市达到一定最大值并强制匹配城市名称的城市。

核心是一个家庭酿造函数getcell,与scanf("%s", ...)类似,但在读取一个或多个换行符时会发出特殊的返回值。这样可以直接获取行和列,而无需读取整行,这可能会很长。

因为函数直接从文件中读取,并且因为必须查看空格和标记,所以会消耗第一个不匹配的字符。为避免这种情况,使用了ungetc,但绝不会超过一次。我不认为这是特别好的风格,但我已经把它保留了原样。 (当你使用字符串和指针时,这种风格是毫不费力的,但不是文件。)

读取距离的代码会有争议地检查行,列和城市之间的一致性,但会跳过对文件I / O和分配的检查,以免使代码混乱。

城市名称必须是单个字词(LeMansLos_Angeles),并存储在单独的固定大小的数组中。 (固定大小是最大城市数量的原因。)距离存储在动态分配的doubles数组中。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#define MAX_CITY 256        // Max. number of cities
#define MAX_NAME 24         // Buffer allocated for a name
#define NEWLINE -2          // Special token: end of line was read

/*
 *      Short-cut acro for string comparison
 */
#define is(a, b) (strcmp(a, b) == 0)

/*
 *      Quick-and-dirty exit macro with message
 */
#define die(...) exit((printf(__VA_ARGS__), putchar('\n'), 1))



/*
 *      Read a cell of at most (max - 1) characters and return its length.
 *      When the end of input is read, return the special value EOF; when
 *      one ore more new-line characters are read, return the special
 *      value NEWLINE. On EOF and NEWLINE, the contents of buf are
 *      undefined.
 */
int getcell(FILE *f, char *buf, size_t max)
{
    size_t len = 0;
    int nl = 0;
    int c;

    /*
     *      Skip leading whitespace and account for newlines
     */
    for (;;) {
        c = fgetc(f);

        if (c == EOF) {
            if (nl) break;
            return EOF;
        }
        if (!isspace(c)) break;
        if (c == '\n') nl++;
    }

    ungetc(c, f);
    if (nl) return NEWLINE;

    /*
     *      Store the token proper
     */
    for (;;) {
        c = fgetc(f);

        if (c == EOF || isspace(c)) break;
        if (len + 1 < max) buf[len++] = c;
    }

    ungetc(c, f);
    buf[len] = '\0';

    return len;
}

int main()
{
    FILE *f = fopen("dist.txt", "r");
    int nrow = -1;
    int ncol = -1;

    char city[MAX_CITY][MAX_NAME];
    int ncity = 0;

    double *data;           // contiguous data block
    double **dist;          // Pointers into that block

    for (;;) {
        char buf[MAX_NAME];
        int len = getcell(f, buf, sizeof(buf));

        if (len == EOF) break;

        if (len == NEWLINE) {
            if (nrow >= 0 && ncol < ncity) {
                die("Insufficient data for %s.", city[nrow]);
            }

            nrow++;
            ncol = -1;

            continue;
        }

        if (nrow < 0) {
            if (ncol < 0) {
                if (!is(buf, "Distance")) die("Wrong file format");
            } else {
                if (ncol >= MAX_CITY) {
                    die("Can have at most %d cities", MAX_CITY);
                }
                strcpy(city[ncity++], buf);
            }

            ncol++;
            continue;
        }

        if (ncol < 0) {
            if (nrow > ncity) {
                die("Too many rows, expected only %d.", ncity);
            }

            if (!is(buf, city[nrow])) {
                die("Expected '%s' in row %d.", city[nrow], nrow);
            }

            if (nrow == 0) {
                // First-touch allocation
                data = malloc(ncity * ncity * sizeof(*data));
                dist = malloc(ncity * sizeof(*dist));

                for (int i = 0; i < ncity; i++) {
                    dist[i] = &data[i * ncity];
                }
            }
        } else {
            if (nrow == ncol) {
                if (!is(buf, "-")) {
                    die("Distance of %s to itself isn't '-'.", city[nrow]);
                }

                dist[nrow][ncol] = 0.0;
            } else {
                double d = strtod(buf, NULL);

                if (ncol >= ncity) {
                    die("Too many columns for %s.", city[nrow]);
                }
                dist[nrow][ncol] = d;
            }
        }

        ncol++;
    }

    if (nrow < ncity) die("Got only %d rows, expected %d.", nrow, ncity);

    /*
     *      Print distance matrix
     */

    printf("Distance");
    for (ncol = 0; ncol < ncity; ncol++) {
        printf(", %s", city[ncol]);
    }
    puts("");

    for (nrow = 0; nrow < ncity; nrow++) {
        printf("%s", city[nrow]);

        for (ncol = 0; ncol < ncity; ncol++) {
            printf(", %g", dist[nrow][ncol]);
        }
        puts("");
    }

    free(dist);
    free(data);

    return 0;
}

答案 2 :(得分:2)

我认为解决方案是完全忽略标题行并从行中提取城市名称,然后在找到第一个数字-后开始扫描{{3 }}。我刚写的这个解决方案还远未完成。它需要更多的结构(使用函数将有助于)和健全性检查(例如每行中列的数量不一定相同)。但我认为它会带你走向正确的方向。

为什么忽略标题行?因为不清楚哪个字符将城市名称与另一个城市名称分开,而城市名称中通常包含空格,例如“洛杉矶”。无论分隔符和城市名称是否包含空格,此方法都将起作用。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stddef.h>

int main()
{
    char *pointer;
    FILE *nansa;
    char buffer[1024];
    char **cities;
    double **distances;
    size_t rows;
    nansa = fopen("dist.txt", "r");
    if (nansa == NULL)
        return -1;
    if (fgets(buffer, sizeof(buffer), nansa) == NULL)
        return -1; // Skip the header line.
    rows = 0;
    distances = NULL;
    cities = NULL;
    while (fgets(buffer, sizeof(buffer), nansa) != NULL)
    {
        char next;
        double value;
        void *aux;
        ptrdiff_t length;
        size_t column;

        pointer = buffer;
        next = *pointer;        
        while ((isdigit((unsigned char) next) == 0) && (next != '-'))
            next = *pointer++;            
        aux = realloc(cities, (rows + 1) * sizeof(*cities));
        if (aux == NULL)
            return -1; // allocation error ABORT
        length = pointer - buffer - 1;
        cities = aux;
        cities[rows] = malloc(length + 1);
        if (cities[rows] == NULL)
            return -1; // allocation error ABORT                        
        memcpy(cities[rows], buffer, length);
        // Remove trailing spaces
        while ((length > 0) && (isspace((unsigned char) cities[rows][length - 1]) != 0))
            --length;
        cities[rows][length] = '\0';
        if (isspace(next) == 0)
            pointer--;
        aux = realloc(distances, (rows + 1) * sizeof(*distances));
        if (aux == NULL)
            return -1;
        distances = aux;
        column = 0;
        distances[rows] = NULL;
        while ((*pointer != '\0') && (*pointer != '\n')) 
        {
            char *endptr;
            aux = realloc(distances[rows], (column + 1) * sizeof(**distances));
            if (aux == NULL)
                return -1;
            distances[rows] = aux;
            value = strtod(pointer, &endptr);
            if (*endptr == '-')
                distances[rows][column] = -1.0;
            else
                distances[rows][column] = value;
            while ((*endptr != '\0') && (isspace((unsigned char) *(endptr + 1)) != 0))
                ++endptr;
            pointer = ++endptr;
            column += 1;
        }
        rows += 1;
    }

    fprintf(stdout, "%-15s|", "Distance");
    for (size_t i = 0 ; i < rows ; ++i)
        fprintf(stdout, " %-14s|", cities[i]);
    fputc('\n', stdout);
    for (size_t i = 0 ; i < rows ; ++i)
    {
        fprintf(stdout, "%-15s|", cities[i]);
        for (size_t j = 0 ; j < rows ; ++j)
        {
            if (distances[i][j] < 0.0) // Invalid distance
                fprintf(stdout, "%15s|", "-");
            else
                fprintf(stdout, "%15.2f|", distances[i][j]);
        }
        free(distances[i]);
        free(cities[i]);
        fputc('\n', stdout);
    }
    free(distances);
    free(cities);

    fclose(nansa);
    return 0;
}

答案 3 :(得分:1)

首先,你有一个不正确的免费,你可以看下面:

for(i=0; i<rows; i++)
{
    for(j=0; j<cols; j++)
    {
        printf("%g%s", mat[i][j], j<cols-1 ? "\t" : "\n");
        /*free(mat[i]); this will be executed several time and the program will crash*/ 
    }
    free(mat[i]);
}
free(mat);

我已更新您的代码以显示所有需要的数据:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <malloc.h>

#define rows 6
#define cols 6

int main()
{
    FILE *nansa;
    char *buffer;
    int ret,row=0,i,j,len=0,maxlen=0;

    char delims[]=" \t";
    char *result=NULL;

    double **mat=malloc( rows*sizeof(double*) );
    for(i=0; i<rows; i++)
    {
        mat[i]=malloc( cols*sizeof(double) ); 
    }

    char **cities = (char **)malloc( rows*sizeof(char *) );
    for(i=0; i<rows; i++)
    {
        cities[i]=(char *)malloc(sizeof(char)*4095); 
    }

    if ((nansa=fopen("dist.txt","r"))==NULL)
    {
        fprintf(stdout, "Error\n"); 
        return -1;
    }
    while(!feof(nansa))
    {
        buffer=malloc( sizeof(char)*4096 );
        memset(buffer,0,4096);
        ret=fscanf(nansa, "%4095[^\n]\n", buffer);
        if(ret != EOF) 
        {
            int field=0;
            result=strtok(buffer,delims);
            while(result != NULL)
            {
                if(field>5) break;
                if(field == 0)
                {
                    strcpy(cities[row], result);
                    len = strlen(result);
                    if(len>maxlen)
                        maxlen=len;
                }
                mat[row][field]=atof(result);
                result=strtok(NULL,delims);
                field++;
            }
            ++row;
        }
        free(buffer);
    }
    fclose(nansa);
    for(i=0; i<cols; i++)
    {
        printf("%-*s%s", maxlen, cities[i], (i<cols-1) ? " " : "\n");
    }
    for(i=1; i<rows; i++)
    {
        printf("%-*s ", maxlen, cities[i]);
        for(j=1; j<cols; j++)
        {
            printf("%-*g%s", maxlen, mat[i][j], (j<cols-1) ? " " : "\n");
        }
    }
    for(i=0; i<rows; i++)
    {
        free(cities[i]);
        free(mat[i]);
    }
    free(mat);
    return 0;
}

结果将是这样的:

Distance  Amsterdam Antwerp   Athens    Barcelona Berlin   
Amsterdam 0         160       3082      1639      649      
Antwerp   160       0         2766      1465      723      
Athens    3082      2766      0         3312      2552     
Barcelona 1639      1465      3312      0         1899     
Berlin    649       723       2552      1899      0        

答案 4 :(得分:1)

...我不知道如何分隔名称和距离(字符和整数)......

只关注文件读取数据解析数据存储 ......

识别文本文件的功能是帮助您确定将 parsing 数据转换为变量的方法的重要一步。

您的文本文件可以分为以下几种:

  • 第一行是标题
  • 每个附加行包含数据(城市和距离)
  • 城市仅在第一列(仅限非数字(字符串))
  • 其余列包含距离(数字和非数字)

虽然数据全部存储在文本文件中,并且最初作为字符串读取,但您表示您希望将它们存储为字符串和数字。城市名称是字符串,距离是整数。但距离部分还包含非数字数据:“ - ”。

可以使用struct来存储多种数据类型。以下代码说明了如何使用结构分析然后分别存储数字和字符串。

注意 :以下示例旨在说明如何从文本文件中分隔名称和距离。错误检查/处理是最小的。

与打印数据相反,我将使用struct数组留下显示数据存储方式的内存段的图像。 (支持您的将数据保存在C 中的矩阵中)的请求。

enum {
    AM,
    AN,
    AT,
    BA,
    BE,
    MAX_CITY
};

typedef struct {//create a way to store both strings and numeric data
    char city[20];
    int dist[MAX_CITY];
}DIST;

DIST dist[MAX_CITY];//array (matrix) of struct DIST for storing results.

int main(void)
{
    int i;
    FILE *fp = {0};
    char *tok = {0};
    char line[260];
    fp = fopen(".\\dist.txt", "r");
    if(fp)
    {
        i = 0;
        fgets(line, 260, fp); //consume first line - header information
        while(fgets(line, 260, fp))
        {
            tok = strtok(line, " \t\n");
            if(tok)
            {
                strcpy(dist[i].city, tok);//get city    
            }
            tok = strtok(NULL, " \t\n");
            if(tok)
            {
                if(strstr(tok, "-")) dist[i].dist[0] = 0;
                else dist[i].dist[0] = atoi(tok);//get city 1 dist  
            }
            tok = strtok(NULL, " \t\n");
            if(tok)
            {
                if(strstr(tok, "-")) dist[i].dist[1] = 0;
                else dist[i].dist[1] = atoi(tok);//get city 2 dist  
            }
            tok = strtok(NULL, " \t\n");
            if(tok)
            {
                if(strstr(tok, "-")) dist[i].dist[2] = 0;
                else dist[i].dist[2] = atoi(tok);//get city 3 dist  
            }
            tok = strtok(NULL, " \t\n");
            if(tok)
            {
                if(strstr(tok, "-")) dist[i].dist[3] = 0;
                else dist[i].dist[3] = atoi(tok);//get city 4 dist  
            }
            tok = strtok(NULL, " \t\n");
            if(tok)
            {
                if(strstr(tok, "-")) dist[i].dist[4] = 0;
                else dist[i].dist[4] = atoi(tok);//get city 5 dist  
            }
            i++;
        }
        fclose(fp);
    }

    return 0;
}

结果摘录 (执行后的结构内容)

enter image description here

答案 5 :(得分:-2)

查看strtok() !!

的用法

您可以将它们与strtok()分开。您将所有内容都设为字符串,然后使用以下符号将距离设为int: - '0'