c - 从文本文件读取到结构数组时遇到麻烦

我最近从大学开始从事C编程（入门课程），现在我们正在做有关患者数据库的最终检查。

我需要将数据从文本文件读取到struct数组（大小为10000）。该文件包含2个字符串数组（个人识别字符串（10个数字，以'-'分隔）和名称字符串），1个包含照片参考的int数组和1个包含每个患者的照片参考数量的整数。我已经尝试过fscanf，但是只要我尝试读取，该程序就会挂起，当我使用fgets时，它会读取整行，并将整数从photo引用数组存储到我的名称数组中（中间一个）。我想知道应该如何去做，我花了很多天试图找出解决方案，但似乎没有任何效果。这是我的文本文件的样子：

123456-1234   Name Name     [1, 2, 3, 4]
234567-2345   Name2 Name2   [1, 2]
345678-3456   Name3 Name3   []

这是我的write_to_file函数，该函数在程序退出时写入文件：

void write_to_file(Patient reg[], int *pNr_of_patients){
FILE *fp;
fp=fopen("file.txt","w");
if(*pNr_of_patients>0){
    int i,j;
    for(i=0;i<*pNr_of_patients;i++){
        fprintf(fp,"%s\t%s\t[",reg[i].pers_nr,reg[i].name);
        for(j=0;j<reg[i].nr_of_ref-1;j++){
            fprintf(fp,"%d, ",reg[i].photo_ref[j]);
        }
        if(reg[i].photo_ref[j]==0){
            fprintf(fp,"]");
        }else{
            fprintf(fp,"%d]",reg[i].photo_ref[j]);
        }
        fprintf(fp,"\n");
    }
    fclose(fp);
}
}

这是我的read_from_file函数，缺少用于读取结尾的int数组值的代码：

编辑：我添加了一个for循环，以从名称字符串中删除以“ [”开头的字符，现在我只需要知道如何将结构末尾的数组值读取到结构的photo引用数组中即可。

void read_from_file(Patient reg[],int *pNr_of_patients){
FILE *fp;
fp=fopen("file.txt","r");
if(fp!=NULL){
    reg[*pNr_of_patients].nr_of_ref=0;
    int i=0, pos;
    while(fgets(reg[*pNr_of_patients].pers_nr,13,fp)!=NULL){
        reg[*pNr_of_patients].pers_nr[strlen(reg[*pNr_of_patients].pers_nr)-1]='\0';
        fgets(reg[*pNr_of_patients].name,31,fp);
        reg[*pNr_of_patients].name[strlen(reg[*pNr_of_patients].name)-1]='\0';
        for(pos=0;pos<30;pos++){
            if(reg[*pNr_of_patients].name[pos]=='['){
                reg[*pNr_of_patients].name[pos]='\0';
            }
        }
        (*pNr_of_patients)++;
    }
    fclose(fp);
}else{
  printf("File does not exist\n");
}
}

这是我的Patient结构的样子：

struct patient{
char pers_nr[12], name[30];
int photo_ref[10], nr_of_ref;
};
typedef struct patient Patient;

在main中调用read_from_file：

int main(void){
Patient patient_register[10000];
int nr_of_patients=0;
read_from_file(patient_register,&nr_of_patients);
database_management(patient_register,&nr_of_patients); //this is where I fill all the data into the array before writing to the file at the end
write_to_file(patient_register,&nr_of_patients);
return 0;

}

最佳答案

我认为扫描输入是C语言中最难的一项。这就是为什么存在诸如cs50之类的库的原因，以简化为新C用户阅读输入的过程。无论如何，我构建了解决方案，但是我重新设计了您的功能。

第一种解决方案是从一行中读取单个Patient。它不使用sscanf设置errno的唯一标准调用是对strtol的调用，该调用用于转换数字。
第二个函数使用sscanf和一些疯狂的格式字符串构造来保持缓冲区溢出的安全。
所有这些都归结为如何构造输入流以及您对它的信任程度。

#include <stdio.h>
#include <assert.h>
#include <stddef.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include <stdlib.h>
#include <limits.h>

struct patient{
    char pers_nr[12];
    char name[30];
    int photo_ref[10];
    size_t nr_of_ref;
};

typedef struct patient Patient;

int patient_read_from_line_1(const char line[], Patient *p)
{
    assert(line != NULL);
    assert(p != NULL);

    // check the first 12 characters ----------
    // first 6 chars must be numbers
    for (int i = 0; i < 6; ++i) {
        if (!isdigit(line[i])) {
            return -__LINE__;
        }
    }
    // followed by a single '-'
    if (line[6] != '-') {
        return -__LINE__;
    }
    // followed by 4 numbers
    for (int i = 7; i < 7 + 4; ++i) {
        if (!isdigit(line[i])) {
            return -__LINE__;
        }
    }
    // followed by a space
    if (line[7 + 4] != ' ') {
        return -__LINE__;
    }
    // read up first field ---------------------
    // cool first field checks out
    memcpy(p->pers_nr, line, 11);
    p->pers_nr[11] = '\0';

    line += 12;
    // let's omit spaces
    while (line[0] == ' ') {
        line++;
    }

    // read up second field --------------------------
    // now we should read a two strings separated by a space
    // so we should read up until a second space
    if (!isalpha(*line)) {
        return -__LINE__;
    }
    const char *pnt_first_space = strchr(line, ' ');
    if (pnt_first_space == NULL) {
        return -__LINE__;
    }
    const char *pnt_another_space = strchr(pnt_first_space + 1, ' ');
    if (pnt_another_space == NULL) {
        return -__LINE__;
    }
    const size_t name_to_read_length = pnt_another_space - line;
    if (name_to_read_length > sizeof(p->name)) {
        return -__LINE__;
    }
    memcpy(p->name, line, name_to_read_length);
    p->name[name_to_read_length] = '\0';

    // buh two fields done, now the array
    line += name_to_read_length;
    // let's omit the spaces
    while (line[0] == ' ') {
        line++;
    }

    // read up array -----------------------------------
    // array
    if (line[0] != '[') {
        return -__LINE__;
    }
    line++;
    for (size_t numscnt = 0;; ++numscnt) {
        if (numscnt >= sizeof(p->photo_ref)/sizeof(*p->photo_ref)) {
            return -__LINE__;
        }
        char *pnt;
        errno = 0;
        long num = strtol(line, &pnt, 10);
        if (errno) {
            return -__LINE__;
        }
        if (!(INT_MIN < num && num < INT_MAX)) {
            return -__LINE__;
        }
        p->photo_ref[numscnt] = num;

        line = pnt;
        // omit spaces
        while (*line == ' ') line++;
        // now we should get a comma
        if (line[0] != ',') {
            // if don't get a comma, we need to get a ]
            if (line[0] == ']') {
                // cool
                ++line;
                // but remember to save the count
                p->nr_of_ref = numscnt + 1;
                // cool
                break;
            }
            return -__LINE__;
        }
        ++line;
        // omit spaces
        while (*line == ' ') line++;
        // start again
    }
    // this needs to be end of line or newline
    if (line[0] != '\0' && line[0] != '\n') {
        return -__LINE__;
    }
    // success!
    return 0;
}

// ok, ok, ok, let's use sscanf
int patient_read_from_line_2(const char line[], Patient *p)
{
    assert(line != NULL);
    assert(p != NULL);
    int ret;
    int pos;

    // read up first fiedl and half of the second ------------------
    ret = sscanf(line, "%12s %30[^ ] %n", p->pers_nr, p->name, &pos);
    if (ret != 2) {
        return -__LINE__;
    }
    line += pos;

    // read up another half of the second field -------------------
    const size_t cur_name_len = strlen(p->name);
    p->name[cur_name_len] = ' ';
    char tmp[20];
    ret = snprintf(tmp, 20, "%%%d[^ ] [%%n", (int)(sizeof(p->name) - cur_name_len - 1));
    if (ret < 0) {
        return -__LINE__;
    }
    ret = sscanf(line, tmp, &p->name[cur_name_len + 1], &pos);
    if (ret != 1) {
        return -__LINE__;
    }
    line += pos;

    // read up array *sigh* -------------------------------------------
    for (p->nr_of_ref = 0;; ++p->nr_of_ref) {
        if (p->nr_of_ref >= sizeof(p->photo_ref)/sizeof(*p->photo_ref)) {
            return -__LINE__;
        }

        ret = sscanf(line, " %d%1s%n", &p->photo_ref[p->nr_of_ref], tmp, &pos);
        if (ret == 0) {
            // hm...
            if (line[0] == ']') {
                // ach all ok, empty numbers list;
                line++;
                p->nr_of_ref++;
                break;
            }
            return -__LINE__;
        }
        if (ret != 2) {
            return -__LINE__;
        }
        line += pos;
        if (tmp[0] != ',') {
            if (tmp[0] == ']') {
                // whoa!  success
                p->nr_of_ref++;
                // cool
                break;
            }
            return -__LINE__;
        }
    }

    // so what's left? - EOF or newline
    if (line[0] != '\0' && line[0] != '\n') {
        return -__LINE__;
    }

    // success!
    return 0;
}

long patient_read_from_file(FILE *fp, Patient patients[], size_t patients_len)
{
    size_t patients_cnt = 0;

    char line[256];
    // for each line in file
    while (fgets(line, sizeof(line), fp) != NULL) {

        const int ret = patient_read_from_line_2(line, &patients[patients_cnt]);
        if (ret < 0) {
            // hanle reading error
            return ret;
        }

        patients_cnt++;
        if (patients_cnt > patients_len) {
            // no more memory in patients left
            return -__LINE__;
        }

    }

    return patients_cnt;
}

void patient_fprintln(FILE *f, const Patient *p)
{
    fprintf(f, "%s %s [", p->pers_nr, p->name);
    for (size_t i = 0; i < p->nr_of_ref; ++i) {
        fprintf(f, "%d", p->photo_ref[i]);
        if (i + 1 != p->nr_of_ref) {
            fprintf(f, ",");
        }
    }
    fprintf(f, "]\n");
}

int main()
{
    FILE *fp;
    fp = stdin; // fopen("file.txt","r");
    if (fp == NULL) {
        return -__LINE__;
    }

    Patient patients[3];
    const long patients_cnt = patient_read_from_file(fp, patients, sizeof(patients)/sizeof(*patients));
    if (patients_cnt < 0) {
        fprintf(stderr, "patient_read_from_file error %ld\n", patients_cnt);
        return patients_cnt;
    }

    fclose(fp);

    printf("Readed %d patients:\n", patients_cnt);
    for (size_t i = 0; i < patients_cnt; ++i) {
        patient_fprintln(stdout, &patients[i]);
    }

    return 0;
}

实时版本位于onlinedbg。

这可以简化为100％。这有100％的错误。这只是为了说明人们有时使用什么方法（strtol，memcpy，sscanf，isdigit，isalpha）从输入中读取内容。另外，我将长度修饰符指定为scanf（sscanf(..., "%12s"）以处理溢出（希望如此）。尝试始终检查scanf和其他标准函数的返回值（也许检查snprintf返回值有点太多，但是，让我们保持一致）。有所不同，在某些平台上，%n scanf修饰符碰巧不起作用。也可以将其构建为使用malloc，realloc和free使用动态分配，这两种方式都是在线读取（基本上等于编写GNU getline的自定义版本），从输入中读取字符串，从中读取int的数组患者的投入和动态分配。

关于c - 从文本文件读取到结构数组时遇到麻烦，我们在Stack Overflow上找到一个类似的问题：https://stackoverflow.com/questions/52652687/