从文件读取到动态结构

时间:2014-12-13 02:09:31

标签: c file linked-list

我想逐行读取文件。每行有3个参数保证。前2名是姓名,第3名是年龄。 我想创建一个链表,其中每个节点代表文件中的一个人(行)。 我不知道名字的大小,所以我让它变得动态。我也不知道文件中的行数,所以我希望它也是动态的。

我的方法是使用fscanf,但是在阅读之前我不知道需要分配多少内存。 函数convertToList应该接收我们想要读取的文件的文件路径,将其转换为链表,然后返回头节点。 (开放改进)

查看我的代码并查看我遇到的问题:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef enum
{
    FALSE,
    TRUE 
}bool;

struct Node{
    char firstName[50];
    char lastName[50];
    int age;
    struct Node *next;
};

typedef struct {
    struct Node *head;
}LinkedList;


struct Node * convertToList(char *inputFilePath);

int main(int argc, char* argv[]) {

    if(argc != 4) {
        printf("Invalid arguments.\n");
        exit(0);
    }
    if (strlen(argv[3])!=1) {
        printf("Invalid sorting type.\n");
        exit(0);
    }

    char *inputFilePath = (char*) malloc(sizeof(char*) * strlen(argv[1]) +1);
    memcpy(inputFilePath, argv[1], strlen(argv[1]));
    char *outputFilePath = (char*) malloc(sizeof(char*) * strlen(argv[2]) +1);
    memcpy(outputFilePath, argv[2], strlen(argv[2]) +1);
    char *sortType = argv[3];

    //LinkedList* inputList = (LinkedList*)malloc(sizeof(struct Node));

    struct Node* head = malloc(sizeof(struct Node));

    head = convertToList(inputFilePath);
    printf("\n%s %s %d\n", head->firstName, head->lastName, head->age);
//              printf("\nsaaap\n");

    getchar();


}

struct Node * convertToList(char *inputFilePath) {
FILE* ifp;
ifp = fopen(inputFilePath, "r");
if (!ifp) { perror("fopen"); exit(0); }
struct Node *head = NULL;
struct Node *prev = NULL;
bool isHead = TRUE;
while(!feof(ifp))   {
    struct Node *tmp = (struct Node*)malloc(sizeof(struct Node));
    if (prev != NULL)
        prev->next = tmp;


    if (head==NULL) 
        head = tmp;

    fscanf(ifp, "%s %s %d\n", tmp->firstName, tmp->lastName, &tmp->age);
    prev = tmp;

    //Need to link to next node as well

}

fclose(ifp);
return head;

}

我知道fscanf是错误的,但我不确定如何修复它。 另外,我如何返回根?我的方法会起作用吗? 最后,如何在列表中设置下一个节点?我不会在当前的循环中看到它发生。

感谢。

2 个答案:

答案 0 :(得分:1)

如果你需要链接节点,你可以这样做并使用动态存储,你可以这么做,我没有想到这一点,但是没关系。

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

struct Node
{
    char *firstName;
    char *lastName;
    int   age;
    struct Node *next;
};

struct Node *convertToList(const char *const inputFilePath);
void freeList(struct Node *);

int main(int argc, char* argv[])
{
    struct Node *head;

    if (argc != 2)
    {
        printf("Invalid arguments.\n");
        return 1;
    }

    head = convertToList(argv[1]);
    if (head != NULL)
    {
        struct Node *current;

        current = head;
        while (current != NULL)
        {
            fprintf(stderr, "%s %s %d\n", current->firstName, current->lastName, current->age);
            current = current->next;
        }
        /* do manupulations with the list, example above, print the values */
        freeList(head);
    }
    return 0;
}

void freeList(struct Node *node)
{
    struct Node *current;

    current = node;
    while (current != NULL)
    {
        struct Node *next;

        next = current->next;
        if (current->firstName != NULL)
            free(current->firstName);
        if (current->lastName != NULL)
            free(current->lastName);
        free(current);

        current = next;
    }

}

size_t appendChar(char **buffer, char character, size_t length)
{
    char *temporary;
    if (buffer == NULL)
        return length;
    temporary = realloc(*buffer, 1 + length);
    if (temporary == NULL)
        return length;
    temporary[length] = character;
    *buffer           = temporary;

    return 1 + length;
}

struct Node *parseFileLine(char *line)
{
    char        *word;
    struct Node *node;
    char        *endptr;

    if (line == NULL)
        return NULL;

    node = malloc(sizeof(struct Node));
    if (node == NULL)
        return NULL;

    node->firstName = NULL;
    node->lastName  = NULL;
    node->age       = -1; // an invalid value;
    node->next      = NULL;

    word = strtok(line, " ");
    if (word == NULL)
        return node;
    node->firstName = strdup(word);

    word = strtok(NULL, " ");
    if (word == NULL)
        return node;
    node->lastName = strdup(word);

    word = strtok(NULL, " ");
    if (word == NULL)
        return node;

    node->age = strtol(word, &endptr, 10);
    if (*endptr != '\0')
        node->age = -1;

    return node;
}

struct Node *getNode(FILE *file)
{
    char  *line;
    int    character;
    size_t length;

    line   = NULL;
    length = 0;
    while ((character = fgetc(file)) != EOF)
    {
        if (((char)character == '\n') && (line != NULL))
        {
            struct Node *node;

            length = appendChar(&line, '\0', length);
            node   = parseFileLine(line);
            free(line);

            return node;
        }
        length = appendChar(&line, (char)character, length);
    }

    if (line != NULL)
        free(line);

    return NULL;
}

struct Node *convertToList(const char *const inputFilePath)
{
    FILE        *ifp;
    struct Node *head;
    struct Node *current;
    struct Node *last;

    ifp = fopen(inputFilePath, "r");
    if (ifp == NULL)
    {
        perror("fopen");
        return NULL;
    }

    head = NULL;
    last = NULL;
    while ((current = getNode(ifp)) != NULL)
    {
        if (current == NULL)
            return head;
        if (head == NULL)
            head = current;
        if (last != NULL)
            last->next = current;
        last = current;
    }
    fclose(ifp);

    return head;
}

在这里,您还可以打印节点以查看数据是否正确。

我认为您不了解malloc的用途,而且您在fscanf firstName中存储数据时也不太了解指针和lastName没有为它分配内存,它们甚至没有被初始化,所以你会得到一个分段错误。

答案 1 :(得分:-1)

有点不同的方法。

argv复制

首先,如上所述,您无需复制argv值。这样做的主要原因是你操纵了价值观。还有一种情况需要删除argv值,因为它们可以被ps和其他工具读取,从/proc/等读取。例如,某些程序将密码作为参数,以防止密码可读任何有权访问系统的人通常会复制参数,然后覆盖argv值。

然而,通常使用变量作为参数是一种好习惯。它通常使代码更清晰,但如果更改代码也更容易维护。例如。实现像-f <filename>这样的标志参数。

exit()并从main()

返回

您还exit()错误为零。您可能希望在成功时退出零,并在错误或其他时退出其他值。这是常态。 0 ==成功。某些应用程序实现了可能意味着不同的数字退出代码。例如。 0是正常退出,1不是错误但是一些特殊情况,2同样3可能是错误等。例如grep

EXIT STATUS
   The exit status is 0 if selected lines are found, and 1 if not found.  If  an
   error occurred the exit status is 2.  (Note: POSIX error handling code should
   check for '2' or greater.)

scanf的

当您使用scanf读取字符串时,可以使用一些技巧来使其更好。首先,请始终使用size参数。

char name[16]
sscanf(buf, "%15s", name);

还要检查读取的项目:

if (sscanf(buf, "%15s %d", name, &age) != 2)
     ... error ...

第三,您还可以保存%n读取的字节数:

sscanf(buf, "%n%15s%n %n%d%n", &of1, name, &of2, &age, &of3)

用法

一个非常简单但又快速且用户友好的东西,就是添加一个使用功能。

典型地:

int usage(const char *self, const char *err_str)
{
    fprintf(stderr,
        "Usage: %s <in-file> <out-file> <sort-type>\n"
        "  Sort types:\n"
        "   f Sort by First Name\n"
        "   l Sort by Last Name\n"
        "   a Sort by Age\n"
        ,
        self
    );
    if (err_str) {
        fprintf(stderr,
            "\nError: %s\n",
            err_str
        );
    }
    return ERR_ARG;
}

然后在main()中,您可以快速清理添加类似的内容:

if (argc < 4)
    return usage(argv[0], "Missing arguments.");

关于验证sort参数的说明。您可以检查字节2是否为0,而不是使用strlen()。

if (argv[3][1] != '\0')
    ... error ...

最后主要可能是:

int main(int argc, char *argv[])
{
    char *in_file, *out_file, sort;
    struct Node *head = NULL;
    int err = 0;

    if (argc < 4)
        return usage(argv[0], "Missing arguments.");
    if (argc > 4)
        return usage(argv[0], "Unknown arguments.");
    if (argv[3][1] != '\0')
        return usage(argv[0], "Invalid sorting type.");

    in_file  = argv[1];
    out_file = argv[2];
    sort     = argv[3][0];

    if (sort != 'f' && sort != 'l' && sort != 'a')
        return usage(argv[0], "Invalid sorting type.");

    if ((err = file_to_llist(in_file, &head)) != 0)
        return err;

    prnt_llist(stdout, head);
    free_ll(head);

    return err;
}

malloc助手

当处理大量malloc和类似的东西时,添加一些辅助函数会很有用。如果您收到内存错误,通常会立即退出。

void *alloc(size_t size)
{
    void *buf;

    if ((buf = malloc(size)) == NULL) {
        fprintf(stderr, "Memory error.\n");
        exit(ERR_MEM);
    }
    return buf;
}

void *re_alloc(void *old, size_t size)
{
    void *buf;

    if ((buf = realloc(old, size)) == NULL) {
        fprintf(stderr, "Memory error.\n");
        exit(ERR_MEM);
    }
    return buf;
}

解析文件

由于您希望动态分配所有内容并且没有限制(超出系统内存),因此一种解决方案是实现某种令牌化程序。使用结构将它们组合在一起会很有帮助。类似的东西:

struct file_toker {
    FILE *fh;     /* File handle */
    char *buf;    /* Dynamic Read buffer */
    size_t size;  /* Size of buffer */
    size_t len;   /* Length of actual data in buffer. */
};

这里的一点是保持令牌的长度。通过这个不需要继续使用strlen等。

如果你能负担得起,通常最好一次读取整个文件,然后解析缓冲区。可选地,人们可以以4096 * 16字节的块来读取文件,但是当读取等之间的重叠线时,会有一些复杂性。

无论如何在这个例子中一次读取一个字节。


开始代码

最后一个起点可能是这样的:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>     /* memcpy/strncpy */
#include <errno.h>      /* errno for fopen() */
#include <ctype.h>      /* isspace() */

#define ERR_ARG         1
#define ERR_FILE_FMT    2
#define ERR_MEM         3

struct Node {
    char *name_first;
    char *name_last;
    int age;
    struct Node *next;
};

struct file_toker {
    FILE *fh;
    char *buf;
    size_t size;
    size_t len;
};

/* ===============----- GEN HELPERS ------=================== */

int usage(const char *self, const char *err_str)
{
    fprintf(stderr,
        "Usage: %s <in-file> <out-file> <sort-type>\n"
        "  Sort types:\n"
        "   f Sort by First Name\n"
        "   l Sort by Last Name\n"
        "   a Sort by Age\n"
        ,
        self
    );
    if (err_str) {
        fprintf(stderr,
            "\nError: %s\n",
            err_str
        );
    }
    return ERR_ARG;
}

void *alloc(size_t size)
{
    void *buf;

    if ((buf = malloc(size)) == NULL) {
        fprintf(stderr, "Memory error.\n");
        exit(ERR_MEM);
    }
    return buf;
}

void *re_alloc(void *old, size_t size)
{
    void *buf;

    if ((buf = realloc(old, size)) == NULL) {
        fprintf(stderr, "Memory error.\n");
        exit(ERR_MEM);
    }
    return buf;
}

/* ===============----- LINKED LIST ------=================== */

void free_node(struct Node *n)
{
    if (!n)
        return;
    if (n->name_first)
        free(n->name_first);
    if (n->name_last)
        free(n->name_last);
    free(n);
}

void free_ll(struct Node *n)
{
    struct Node *p;

    if (!n)
        return;
    for ( ; n ; ) {
        p = n;
        n = n->next;
        free_node(p);
    }
}


void prnt_llist(FILE *fd, struct Node *n)
{
    int i = 0;

    fprintf(fd, "NODELIST:\n");
    for ( ; n != NULL ; n = n->next) {
        fprintf(fd,
            "Entry %d {\n"
            "  Name: %s, %s\n"
            "  Age : %d\n"
            "}\n",
            ++i,
            n->name_last,
            n->name_first,
            n->age
        );
    }
}

/* ================--------- FILE TOKER ------------==================== */
/* Free / close reader. */
void free_ft(struct file_toker *ft)
{
    if (!ft)
        return;
    if (ft->fh)
        fclose(ft->fh);
    free(ft->buf);
    ft->fh = NULL;
    ft->buf = NULL;
}
/* Initiate reader. */
int ft_init(struct file_toker *ft, const char *fn, size_t buf_sz)
{
    ft->size = buf_sz;
    ft->len = 0;
    ft->buf = alloc(ft->size);

    ft->fh = fopen(fn, "r");
    if (!ft->fh) {
        perror("Unable to open file");
        return errno;
    }
    return 0;
}
/* Increase buffer size. */
size_t ft_increase(struct file_toker *ft)
{
    if (ft->size < 1)
        ft->size = 1;
    ft->size *= 2;
    ft->buf = re_alloc(ft->buf, ft->size);
    return ft->size;
}
/* Read and skip spaces (\n, \r, ' ', \t etc.). Return first non-space. */
char ft_skip_space(struct file_toker *ft)
{
    int c;

    while ((c = fgetc(ft->fh)) != EOF && isspace(c))
        ;
    return c == EOF ? 0 : (char)c;
}
/* Read next token */
size_t file_tok(struct file_toker *ft)
{
    size_t i = 1;
    size_t max;
    int c;

    if (ft->size < 2)
        ft_increase(ft);

    ft->len = 0;
    max = ft->size - 1;

    /* Skip any leading spaces. Function return first non-space. */
    if ((ft->buf[0] = ft_skip_space(ft)) == 0)
        return 0;

    while ((c = fgetc(ft->fh)) != EOF) {
        /* If space, break. */
        if (isspace(c))
            break;
        /* Save char to buffer. */
        ft->buf[i++] = (char)c;
        /* If entire buffer used, increase it's size. */
        if (i > max)
            max = ft_increase(ft) - 1;
    }
    /* Null terminate. */
    ft->buf[i] = 0x00;
    /* Length without terminating null */
    ft->len = i;

    return i;
}
/* Read next space separated token and save it as new allocated string. */
int file_tok_str(struct file_toker *ft, char **out)
{
    if (file_tok(ft) == 0)
        return 1;
    *out = alloc(ft->len + 1);
    memcpy(*out, ft->buf, ft->len + 1);

    return 0;
}
/* Read next space separated token and scan it as int. */
int file_tok_int(struct file_toker *ft, int *out)
{
    if (file_tok(ft) == 0)
        return 1;
    if ((sscanf(ft->buf, "%d", out)) != 1)
        return 1;
    return 0;
}

/* ===============----- FILE PARSER ------=================== */    
int file_to_llist(const char *fn, struct Node **head)
{
    struct Node *node = NULL, *cur = *head;
    struct file_toker ft;

    /* Initiate new file token reader, initial buffer size 4096 bytes. */
    if (ft_init(&ft, fn, 4096))
        return 1;

    while (1) {
        /* Allocate next node */
        node = alloc(sizeof(struct Node));
        node->name_first = NULL;
        node->name_last  = NULL;
        /* Read and copy first name. */
        if (file_tok_str(&ft, &node->name_first))
            break;
        /* Read and copy last name. */
        if (file_tok_str(&ft, &node->name_last))
            break;
        /* Read and copy age. */
        if (file_tok_int(&ft, &node->age))
            break;

        /* Link and save current for next iteration. */
        node->next = NULL;
        if (cur) {
            cur->next = node;
        }
        cur = node;
        if (*head == NULL)
            *head = node;
    }
    /* Free last unused node. */
    free_node(node);
    free_ft(&ft);

    return 0;
}

/* ===============----- MAIN ROUTINE ------=================== */
int main(int argc, char *argv[])
{
    char *in_file, *out_file, sort;
    struct Node *head = NULL;
    int err = 0;

    if (argc < 4)
        return usage(argv[0], "Missing arguments.");
    if (argc > 4)
        return usage(argv[0], "Unknown arguments.");
    if (argv[3][1] != '\0')
        return usage(argv[0], "Invalid sorting type.");

    in_file  = argv[1];
    out_file = argv[2];
    sort     = argv[3][0];

    if (sort != 'f' && sort != 'l' && sort != 'a')
        return usage(argv[0], "Invalid sorting type.");

    if ((err = file_to_llist(in_file, &head)) != 0)
        return err;

    prnt_llist(stdout, head);
    free_ll(head);

    return err;
}