Question

我最近开始在大学学习C编程（初级课程），现在我们正在做有关患者数据库的期末考试。

我需要将数据从文本文件读取到struct数组（大小为10000）。该文件包含2个字符串数组（个人识别字符串（用'-'分隔的10个数字）和名称字符串），1个包含照片参考的int数组和1个包含每个患者的照片参考数量的整数。我已经尝试过fscanf，但是只要我尝试读取，该程序就会挂起，当我使用fgets时，它会读取整行，并将整数从photo引用数组存储到我的名称数组中（中间一个）。我想知道应该如何去做，我花了很多天试图找出解决方案，但似乎没有任何效果。这是我的文本文件的样子：

123456-1234   Name Name     [1, 2, 3, 4]
234567-2345   Name2 Name2   [1, 2]
345678-3456   Name3 Name3   []

这是我的write_to_file函数，该函数在程序退出时写入文件：

void write_to_file(Patient reg[], int *pNr_of_patients){
FILE *fp;
fp=fopen("file.txt","w");
if(*pNr_of_patients>0){
    int i,j;
    for(i=0;i<*pNr_of_patients;i++){
        fprintf(fp,"%s\t%s\t[",reg[i].pers_nr,reg[i].name);
        for(j=0;j<reg[i].nr_of_ref-1;j++){
            fprintf(fp,"%d, ",reg[i].photo_ref[j]);
        }
        if(reg[i].photo_ref[j]==0){
            fprintf(fp,"]");
        }else{
            fprintf(fp,"%d]",reg[i].photo_ref[j]);
        }
        fprintf(fp,"\n");
    }
    fclose(fp);
}
}

这是我的read_from_file函数，缺少用于读取结尾的int数组值的代码：

编辑：我添加了一个for循环，以从名称字符串中删除以“ [”开头的字符，现在我只需要知道如何将结构末尾的数组值读取到结构的图片引用数组中即可。

void read_from_file(Patient reg[],int *pNr_of_patients){
FILE *fp;
fp=fopen("file.txt","r");
if(fp!=NULL){
    reg[*pNr_of_patients].nr_of_ref=0;
    int i=0, pos;
    while(fgets(reg[*pNr_of_patients].pers_nr,13,fp)!=NULL){
        reg[*pNr_of_patients].pers_nr[strlen(reg[*pNr_of_patients].pers_nr)-1]='\0';
        fgets(reg[*pNr_of_patients].name,31,fp);
        reg[*pNr_of_patients].name[strlen(reg[*pNr_of_patients].name)-1]='\0';
        for(pos=0;pos<30;pos++){
            if(reg[*pNr_of_patients].name[pos]=='['){
                reg[*pNr_of_patients].name[pos]='\0';
            }
        }
        (*pNr_of_patients)++;
    }
    fclose(fp);
}else{
  printf("File does not exist\n");  
}
}

这是我的Patient结构的样子：

struct patient{
char pers_nr[12], name[30];
int photo_ref[10], nr_of_ref;
};
typedef struct patient Patient;

在main中调用read_from_file：

int main(void){
Patient patient_register[10000];
int nr_of_patients=0;
read_from_file(patient_register,&nr_of_patients);
database_management(patient_register,&nr_of_patients); //this is where I fill all the data into the array before writing to the file at the end
write_to_file(patient_register,&nr_of_patients);
return 0;

}

Answer 1

我认为扫描输入是C语言中最难的一种。这就是为什么存在cs50之类的库，以简化新C用户阅读输入的原因。无论如何，我构建了解决方案，但重新设计了您的功能。

第一个解决方案从一行中读取一个Patient。它不使用sscanf来设置errno的唯一标准调用是对strtol的调用，该调用用于转换数字。
第二个函数使用sscanf和一些疯狂的格式字符串构造来保持缓冲区溢出的安全。
一切都取决于输入流的构造方式以及您对它的信任程度。

#include <stdio.h>
#include <assert.h>
#include <stddef.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include <stdlib.h>
#include <limits.h>

struct patient{
    char pers_nr[12];
    char name[30];
    int photo_ref[10];
    size_t nr_of_ref;
};

typedef struct patient Patient;

int patient_read_from_line_1(const char line[], Patient *p)
{
    assert(line != NULL);
    assert(p != NULL);

    // check the first 12 characters ----------
    // first 6 chars must be numbers
    for (int i = 0; i < 6; ++i) {
        if (!isdigit(line[i])) {
            return -__LINE__;
        }
    }
    // followed by a single '-'
    if (line[6] != '-') {
        return -__LINE__;
    }
    // followed by 4 numbers
    for (int i = 7; i < 7 + 4; ++i) {
        if (!isdigit(line[i])) {
            return -__LINE__;
        }
    }
    // followed by a space
    if (line[7 + 4] != ' ') {
        return -__LINE__;
    }
    // read up first field ---------------------
    // cool first field checks out
    memcpy(p->pers_nr, line, 11);
    p->pers_nr[11] = '\0';

    line += 12;
    // let's omit spaces
    while (line[0] == ' ') {
        line++;
    }

    // read up second field --------------------------
    // now we should read a two strings separated by a space
    // so we should read up until a second space
    if (!isalpha(*line)) {
        return -__LINE__;
    }
    const char *pnt_first_space = strchr(line, ' ');
    if (pnt_first_space == NULL) {
        return -__LINE__;
    }
    const char *pnt_another_space = strchr(pnt_first_space + 1, ' ');
    if (pnt_another_space == NULL) {
        return -__LINE__;
    }
    const size_t name_to_read_length = pnt_another_space - line;
    if (name_to_read_length > sizeof(p->name)) {
        return -__LINE__;
    }
    memcpy(p->name, line, name_to_read_length);
    p->name[name_to_read_length] = '\0';

    // buh two fields done, now the array
    line += name_to_read_length;
    // let's omit the spaces
    while (line[0] == ' ') {
        line++;
    }

    // read up array -----------------------------------
    // array
    if (line[0] != '[') {
        return -__LINE__;
    }
    line++;
    for (size_t numscnt = 0;; ++numscnt) {
        if (numscnt >= sizeof(p->photo_ref)/sizeof(*p->photo_ref)) {
            return -__LINE__;
        }
        char *pnt;
        errno = 0;
        long num = strtol(line, &pnt, 10);
        if (errno) {
            return -__LINE__;
        }
        if (!(INT_MIN < num && num < INT_MAX)) {
            return -__LINE__;
        }
        p->photo_ref[numscnt] = num;

        line = pnt;
        // omit spaces
        while (*line == ' ') line++;
        // now we should get a comma
        if (line[0] != ',') {
            // if don't get a comma, we need to get a ]
            if (line[0] == ']') {
                // cool
                ++line;
                // but remember to save the count
                p->nr_of_ref = numscnt + 1;
                // cool
                break;
            }
            return -__LINE__;
        }
        ++line;
        // omit spaces
        while (*line == ' ') line++;
        // start again
    }
    // this needs to be end of line or newline
    if (line[0] != '\0' && line[0] != '\n') {
        return -__LINE__;
    }
    // success!
    return 0;
}

// ok, ok, ok, let's use sscanf
int patient_read_from_line_2(const char line[], Patient *p)
{
    assert(line != NULL);
    assert(p != NULL);
    int ret;
    int pos;

    // read up first fiedl and half of the second ------------------
    ret = sscanf(line, "%12s %30[^ ] %n", p->pers_nr, p->name, &pos);
    if (ret != 2) {
        return -__LINE__;
    }
    line += pos;

    // read up another half of the second field -------------------
    const size_t cur_name_len = strlen(p->name);
    p->name[cur_name_len] = ' ';
    char tmp[20];
    ret = snprintf(tmp, 20, "%%%d[^ ] [%%n", (int)(sizeof(p->name) - cur_name_len - 1));
    if (ret < 0) {
        return -__LINE__;
    }
    ret = sscanf(line, tmp, &p->name[cur_name_len + 1], &pos);
    if (ret != 1) {
        return -__LINE__;
    }
    line += pos;

    // read up array *sigh* ------------------------------------------- 
    for (p->nr_of_ref = 0;; ++p->nr_of_ref) {
        if (p->nr_of_ref >= sizeof(p->photo_ref)/sizeof(*p->photo_ref)) {
            return -__LINE__;
        }

        ret = sscanf(line, " %d%1s%n", &p->photo_ref[p->nr_of_ref], tmp, &pos);
        if (ret == 0) {
            // hm...
            if (line[0] == ']') {
                // ach all ok, empty numbers list;
                line++;
                p->nr_of_ref++;
                break;
            }
            return -__LINE__;
        }
        if (ret != 2) {
            return -__LINE__;
        }
        line += pos;
        if (tmp[0] != ',') {
            if (tmp[0] == ']') {
                // whoa!  success
                p->nr_of_ref++;
                // cool
                break;
            }
            return -__LINE__;
        }
    }

    // so what's left? - EOF or newline
    if (line[0] != '\0' && line[0] != '\n') {
        return -__LINE__;
    }

    // success!
    return 0;
}

long patient_read_from_file(FILE *fp, Patient patients[], size_t patients_len)
{
    size_t patients_cnt = 0;

    char line[256];
    // for each line in file
    while (fgets(line, sizeof(line), fp) != NULL) {

        const int ret = patient_read_from_line_2(line, &patients[patients_cnt]);
        if (ret < 0) {
            // hanle reading error 
            return ret;
        }

        patients_cnt++;
        if (patients_cnt > patients_len) {
            // no more memory in patients left
            return -__LINE__;
        }

    }

    return patients_cnt;
}

void patient_fprintln(FILE *f, const Patient *p)
{
    fprintf(f, "%s %s [", p->pers_nr, p->name);
    for (size_t i = 0; i < p->nr_of_ref; ++i) {
        fprintf(f, "%d", p->photo_ref[i]);
        if (i + 1 != p->nr_of_ref) {
            fprintf(f, ",");
        }
    }
    fprintf(f, "]\n");
}

int main()
{
    FILE *fp;
    fp = stdin; // fopen("file.txt","r");
    if (fp == NULL) {
        return -__LINE__;
    }

    Patient patients[3];
    const long patients_cnt = patient_read_from_file(fp, patients, sizeof(patients)/sizeof(*patients));
    if (patients_cnt < 0) {
        fprintf(stderr, "patient_read_from_file error %ld\n", patients_cnt);
        return patients_cnt;
    }

    fclose(fp);

    printf("Readed %d patients:\n", patients_cnt);
    for (size_t i = 0; i < patients_cnt; ++i) {
        patient_fprintln(stdout, &patients[i]);
    }

    return 0;
}

可通过onlinedbg获得实时版本。

可以将其简化为100％。这有100％的错误。这只是为了说明人们有时使用什么方法（strtol，memcpy，sscanf，isdigit，isalpha）来读取输入内容。另外，我将长度修饰符指定为scanf（sscanf(..., "%12s"）来处理溢出（希望如此）。尝试始终检查scanf和其他标准函数的返回值（也许检查snprintf的返回值太多了，但是，让我们保持一致）。有所不同，在某些平台上%n scanf修饰符碰巧不起作用。也可以将其构建为使用malloc，realloc和free使用动态分配，这两种方式都是在线读取（基本上等于编写GNU getline的自定义版本），从输入，从患者的输入和动态分配中读取int的数组。

Answer 2

这只是评论，但太长了，所以我在这里输入。

read_from_file（）看起来过于复杂。您可能考虑重新访问fscanf，将照片参考作为一个整体读取，然后解析为可分配给photo_ref数组的整数。（虽然下面的代码可以编译，但我尚未验证它是否可以工作。这只是一个如何进行的想法。）

void read_from_file (Patient reg[], int *pNr_of_patients)
{
  FILE *fp;
  fp = fopen ("file.txt", "r");
  if (fp != NULL)
    {
      int n;
      int i = 0;        // position in photo_ref
      char refs[30];
      *pNr_of_patients = 0;
      while (EOF !=
         (n =
          fscanf (fp, "%s %[^[]%[^]]]", reg[*pNr_of_patients].pers_nr,
              reg[*pNr_of_patients].name, refs)))
    {
      // btw, reg[*pNr_of_patients].name may contain terminating blanks. right trim it. that's easy enough.
      if (n > 2)
        { /* found photo refs.Now split the string into integers */
          char *s = refs + 1;   //skip '['
          char *p;
          while (*s && i<10){       // scan for the integers, 10 of them
            while (*s && *s == ' ')
              s++;  // skip blanks
            p = s;  // mark start of number
            while (*p && *p != ',')
              p++;
            if (*p == ',')
              *p = 0;
            reg[*pNr_of_patients].photo_ref[i++] = atoi (s);    //tip: use strtol(3), verify that `i' isnt larger than size of the array
            s = p + 1;  // skip ','. Must Do: verify that `s' hasnt yet moved past the end of `ref'!!
          }
        }
      (*pNr_of_patients)++;
    }
      fclose (fp);
    }
  else
    {
      printf ("File does not exist\n");
    }
}

Answer 3

Divide and Conquer

将此细分为几个步骤。创建一个填充1 Patient的函数。

以下是未经测试的代码。认为这是一个起点。明确的目标是使一个函数将1条 line 读入 1 Patient。

阅读1条完整的行

// return 1: success, 0: failure EOF:end-of-file
int read_once_from_file(FILE *stream, Patient *pat_ptr) {
  Patient pat = { 0 };
  char buffer[100 + 30*13];
  if (fgets(buffer, sizeof buffer, stream) == NULL) {
    return EOF;
  }

解析第一部分。使用"%n"来记录解析偏移量。在字符串输入上使用宽度限制。

  int n = 0;
  if (sscanf(buffer, " %11[^\t] %29[^\t] [ %n", pat.pers_nr, pat.name) != 2) {
    return 0; // improper formatted input
  }
  char *p = buffer + n;

现在寻找']'和photo_ref

  if (*p != ']') {
    for (pat.nr_of_ref=0; ; pat.nr_of_ref++) {
      if (sscanf(p, "%d %n", &pat.photo_ref[i], &n) != 1) {
        return 0; // improper formatted input
      }
      p += n;
      if (*p == ']') {
        pat.nr_of_ref++;
        break;
      }
      if (*p != ',' || pat.nr_of_ref + 1 == 10) {
        return 0; // improper formatted input
      }
      p++;
    }
  }

保存结果

  *pat_ptr = pat;
  return 1;
}

根据需要致电read_once_from_file()

void read_from_file(Patient reg[],int *pNr_of_patients){
  *pNr_of_patients = 0;
  FILE *fp = fopen("file.txt","r");
  if(fp){
    for (int i = 0; i<10000; i++) {
      int count = read_once_from_file(fp, &reg[i]);
      if (count ==  EOF) {
        break;
      }
      if (count != 1) {
        // error
        fprintf(stderr, "Input error\n"); 
        break;
      }
    } 
    *pNr_of_patients = i;
    fclose(fp);
  }
}

Answer 4

已经有一些不错的答案，但是大多数答案都试图使用一种方法来解析该行的所有元素。我先将整行读入缓冲区，然后使用sscanf()解析患者编号和姓名，但使用strtok()将数组拆分为各个部分：

void read_from_file(Patient reg[], int *pNr_of_patients) {
    FILE *fp = fopen("file.txt", "r");
    if (!fp) {
        fprintf(stderr, "Error opening file: %s\n", strerror(errno));
        *pNr_of_patients = 0;
        return;
    }

    char line[1024];
    int i = 0;

    while (fgets(line, sizeof line, fp)) {
        int offset = 0;
        int refs = 0;

        sscanf(line, "%11s %29[^[] [%n", &reg[i].pers_nr, &reg[i].name, &offset);

        for (char *tok = strtok(line + offset, ","); tok && refs < 10; tok = strtok(NULL, ",")) {
            if (*tok != ']')
                reg[i].photo_ref[refs++] = atoi(tok);
        }
        reg[i].nr_of_ref = refs;
        i++;
    }

    *pNr_of_patients = i;
}

从文本文件读取到结构数组时遇到问题

4 个答案: