使用C中的sscanf从文件中读取多行字符串

时间:2015-07-26 01:04:51

标签: c string parsing

我有一个包含特定记录信息的文本文件,文件格式如下:

Title: Void
Artist: RL Grime
Year: 2014
Genre: Bass
Label: We Did It
Price: 14.95
----
Title: Mssingno EP
Artist: Mssingno
Year: 2013
Genre: Grime / Garage
Label: Goon Club Allstars
Price: 10.00
----

我正在尝试使用sscanf()编写一个可以解析数据的程序,然后将每个集合存储到一个名为struct的{​​{1}}中。 Record看起来像这样:

Record

但我想知道我能做些什么才能真正解析这些数据。我可以做一行,但我不知道如何用typedef struct { char title[80]; char artist[80]; int year; char genre[80]; char label[80]; double price; } Record; Record record_inventory[MAX_RECORDS]; 做一整段。

例如,单行(文件im从sscanf()读取)将如下所示:

ifp

但很明显,因为输入只能保持一行,所以我无法以任何方式添加到sscanf。有没有有效的方法来获得四行fgets,然后解析它除了在char input[1024]; while(fgets(input, 1024, ifp)) { sscanf(input, "Title: %[^,]", record_inventory[0].title); } fgets之间交替四次?

2 个答案:

答案 0 :(得分:2)

#include <stdio.h>
#include <string.h>

#define MAX_RECORDS 64

typedef struct {
    char    title[80];
    char    artist[80];
    int     year;
    char    genre[80];
    char    label[80];
    double  price;
} Record;

Record record_inventory[MAX_RECORDS];

int main(void){
    char input[128], rec[6*128] = "";
    int n = 0;
    FILE *ifp = fopen("data.txt", "r");


    while(fgets(input, sizeof input, ifp)){
        if(strncmp(input, "----", 4)==0){
            if(6!=sscanf(rec ,
                "Title: %79[^\n] "
                "Artist: %79[^\n] "
                "Year: %d "
                "Genre: %79[^\n] "
                "Label: %79[^\n] "
                "Price: %lf",
                record_inventory[n].title,
                record_inventory[n].artist,
                &record_inventory[n].year,
                record_inventory[n].genre,
                record_inventory[n].label,
                &record_inventory[n].price)){
                fprintf(stderr, "bad format!\n");
            } else {
                if(++n == MAX_RECORDS){
                    fprintf(stderr, "full.\n");
                    break;
                }
            }
            *rec = '\0';
        } else {
            strcat(rec, input);//strcat up to ----
        }
    }
    fclose(ifp);
    //check print
    for(int i = 0; i < n; ++i){
        printf("%s : %f\n", record_inventory[i].title, record_inventory[i].price);
    }
    return 0;
}

答案 1 :(得分:0)

您显示的数据很好且一致 - 6条数据线和一条记录结束(EOR)标记,字段顺序相同。目前尚不清楚是否可以安全地假设数据都具有良好的纪律性。让我们假设,确实如此。然后,您需要读取并累积直到EOR的行,然后处理结果数据。

您还没有规定您是否在支持POSIX getline()的平台上。我会假设你是因为它让生活更简单。如有必要,您可以使用fgets()执行此操作。

您可以使用以下代码阅读EOR:

static size_t max(size_t x, size_t y) { return (x > y) ? x : y; }

char *get_record(FILE *fp, const char *eor)
{
    char *ibuffer = 0;
    size_t ibuflen = 0;
    char *obuffer = 0;
    size_t obuflen = 0;
    size_t omaxlen = 0;
    ssize_t ilen;
    size_t eorlen = strlen(eor);

    while ((ilen = getline(&ibuffer, &ibuflen, fp)) != -1)
    {
        if (obuflen + ilen + 1 >= omaxlen)
        {
            size_t nbuflen = max(obuflen * 2, obuflen + ilen + 1);
            void *nbuffer = realloc(obuffer, nbuflen);
            if (nbuffer == 0)
            {
                free(ibuffer);
                free(obuffer);
                return 0;
            }
            obuffer = nbuffer;
            omaxlen = nbuflen;
        }
        memmove(obuffer + obuflen, ibuffer, ilen + 1);
        obuflen += ilen;
        if (strncmp(ibuffer, eor, eorlen) == 0 && ibuffer[eorlen] == '\n')
            break;
    }
    free(ibuffer);
    return obuffer;
}

/* Test harness for get_record() */
int main(void)
{
    char *buffer;

    while ((buffer = get_record(stdin, "----")) != 0)
    {
        printf("[[%s]]\n", buffer);
        free(buffer);
    }

    return 0;
}

源文件getrec.c

然后可以将其扩展为从记录中处理结构,如下所示:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

enum { MAX_RECORDS = 20 };
enum { MAX_TAG = 20 };

typedef struct
{
    char    title[80];
    char    artist[80];
    int     year;
    char    genre[80];
    char    label[80];
    double  price;
} Record;

static Record record_inventory[MAX_RECORDS];
static size_t n_rec = 0;

static size_t max(size_t x, size_t y) { return (x > y) ? x : y; }

extern char *get_record(FILE *fp, const char *eor);
extern int scan_record(const char *buffer, Record *record);
extern void print_record(size_t i, const Record *record);

char *get_record(FILE *fp, const char *eor)
{
    char *ibuffer = 0;
    size_t ibuflen = 0;
    char *obuffer = 0;
    size_t obuflen = 0;
    size_t omaxlen = 0;
    ssize_t ilen;
    size_t eorlen = strlen(eor);

    while ((ilen = getline(&ibuffer, &ibuflen, fp)) != -1)
    {
        if (obuflen + ilen + 1 >= omaxlen)
        {
            size_t nbuflen = max(obuflen * 2, obuflen + ilen + 1);
            void *nbuffer = realloc(obuffer, nbuflen);
            if (nbuffer == 0)
            {
                free(ibuffer);
                free(obuffer);
                return 0;
            }
            obuffer = nbuffer;
            omaxlen = nbuflen;
        }
        memmove(obuffer + obuflen, ibuffer, ilen + 1);
        obuflen += ilen;
        if (strncmp(ibuffer, eor, eorlen) == 0 && ibuffer[eorlen] == '\n')
            break;
    }
    free(ibuffer);
    return obuffer;
}

static int scan_tag(const char *tag, const char *data)
{
    int pos;
    char fmtstr[MAX_TAG];
    sprintf(fmtstr, " %%%d[^:]:%%n", MAX_TAG - 1);
    char tagstr[MAX_TAG];
    if (sscanf(data, fmtstr, tagstr, &pos) != 1)
        return 0;
    if (strcmp(tagstr, tag) != 0)
        return 0;
    return pos + 1;
}

static size_t scan_string(const char *tag, const char *data, char *buffer, size_t buflen)
{
    int pos1 = scan_tag(tag, data);
    if (pos1 == 0)
        return 0;

    char fmtstr[MAX_TAG];
    int pos2;
    sprintf(fmtstr, " %%%zu[^\n]%%n", buflen - 1);
    if (sscanf(data + pos1, fmtstr, buffer, &pos2) != 1)
        return 0;
    return (size_t)(pos1 + pos2);
}

static size_t scan_integer(const char *tag, const char *data, int *int_val)
{
    int pos1 = scan_tag(tag, data);
    if (pos1 == 0)
        return 0;

    int pos2;
    if (sscanf(data + pos1, "%d%n", int_val, &pos2) != 1)
        return 0;
    return (size_t)(pos1 + pos2);
}

static size_t scan_double(const char *tag, const char *data, double *dbl_val)
{
    int pos1 = scan_tag(tag, data);
    if (pos1 == 0)
        return 0;

    int pos2;
    if (sscanf(data + pos1, "%lf%n", dbl_val, &pos2) != 1)
        return 0;
    return (size_t)(pos1 + pos2);
}

int scan_record(const char *buffer, Record *record)
{
    size_t offset = 0;
    const char *scan_pos = buffer + offset;

    if ((offset = scan_string("Title", scan_pos, record->title, sizeof(record->title))) == 0)
        return -1;
    scan_pos += offset;
    if ((offset = scan_string("Artist", scan_pos, record->artist, sizeof(record->artist))) == 0)
        return -1;
    scan_pos += offset;
    if ((offset = scan_integer("Year", scan_pos, &record->year)) == 0)
        return -1;
    scan_pos += offset;
    if ((offset = scan_string("Genre", scan_pos, record->genre, sizeof(record->genre))) == 0)
        return -1;
    scan_pos += offset;
    if ((offset = scan_string("Label", scan_pos, record->label, sizeof(record->label))) == 0)
        return -1;
    scan_pos += offset;
    if ((offset = scan_double("Price", scan_pos, &record->price)) == 0)
        return -1;
    return 0;
}

void print_record(size_t i, const Record *record)
{
    printf("Record:  %zu\n", i);
    printf("Title:   %s\n", record->title);
    printf("Artist:  %s\n", record->artist);
    printf("Year:    %4d\n", record->year);
    printf("Genre:   %s\n", record->genre);
    printf("Label:   %s\n", record->label);
    printf("Price:   %.2f\n", record->price);
    putchar('\n');
}

int main(void)
{
    char *buffer;
    int rc = 0;

    while ((buffer = get_record(stdin, "----")) != 0 && rc == 0)
    {
        printf("Input %zu: [[%s]]\n", n_rec, buffer);
        rc = scan_record(buffer, &record_inventory[n_rec++]);
        free(buffer);
    }

    for (size_t i = 0; i < n_rec; i++)
        print_record(i, &record_inventory[i]);

    return 0;
}

示例输入文件data

Title: Void
Artist: RL Grime
Year: 2014
Genre: Bass
Label: We Did It
Price: 14.95
----
Title: Mssingno EP
Artist: Mssingno
Year: 2013
Genre: Grime / Garage
Label: Goon Club Allstars
Price: 10.00
----

示例输出

$ ./getrec < data
Input 0: [[Title: Void
Artist: RL Grime
Year: 2014
Genre: Bass
Label: We Did It
Price: 14.95
----
]]
Input 1: [[Title: Mssingno EP
Artist: Mssingno
Year: 2013
Genre: Grime / Garage
Label: Goon Club Allstars
Price: 10.00
----
]]
Record:  0
Title:   Void
Artist:  RL Grime
Year:    2014
Genre:   Bass
Label:   We Did It
Price:   14.95

Record:  1
Title:   Mssingno EP
Artist:  Mssingno
Year:    2013
Genre:   Grime / Garage
Label:   Goon Club Allstars
Price:   10.00

$

处理记录元素或缺少元素或额外元素的变量顺序,留作练习。 scan_tag()函数需要返回它找到的标记,而不是检查它是否被赋予了正确的标记。调用代码必须适应已发现的内容,如果同一标记出现多次则报告错误等。

显示的代码不适合报告错误。它发现它们没问题,但它没有报告问题是什么。有很多方法可以做到这一点;你必须决定什么对你最好。

确保在交付之前了解整个代码中的内容;其中一些不是初学者写的东西。