使用getchar()读取制表符分隔文件

时间:2016-09-01 12:38:43

标签: c

我正在尝试使用输入重定向./program < file.txt读取文本文件,而我制作的文件如下所示:

Country\tSport\tGender\tMedal\n
America\tCycling\tMens\tGold\n
New Zealand\tSwimming\tWomens\tSilver\n
India\tBadminton\tMens\tbronze\n

根据包含4列的第一行,其中只读取了一些随机数据,标题为CountrySportGenderMedal

我还插入了\t\n以使文件更具可读性,但该文件实际上包含标签和换行符。

我正在尝试读取该文件的每一行,并将它们存储在一个字符串数组中,我已将其声明为:

char *records[ROWS][COLUMNS];

我希望字符串数组records看起来像:

{{"Country", "Sport", "Gender", "Medal"}, {"America", "Cycling", "Mens", "Gold"}, 
{"New Zealand", "Swimming", "Womens", "Silver"}, {"India", "Badminton", "Mens", "Bronze"}}

到目前为止,我一直在使用scanf来读取这些行,如下所示:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define ROWS 1000
#define COLS 30
#define MAX_CH 50

int
main(int argc, char *argv[]) {
    char *records[ROWS][COLS];

    char country[MAX_CH];
    char sport[MAX_CH];
    char gender[MAX_CH];
    char medal[MAX_CH];

    while (scanf("%s\t%s\t%s\t%s\n", country, sport, gender, medal) == 4) {
        printf("%s %s %s %s\n", country, sport, gender, medal);
    }

    return 0;
}

我知道这不起作用,因为国家/地区名称New Zealand在两个字符串之间都有空格,而我的scanf只会读取前四个字符。我的scanf方法也不会有效,因为它只适用于4列。

我可以使用getchar()代替这样做吗?我不确定如何使用getchar分析输入流中的每个字符,并根据标签和换行符将必要的字符转换为字符串。

4 个答案:

答案 0 :(得分:2)

getChar()的伪代码:

while (char = getChar()) is not 'EOF': // EOF = End of file
    if char is not '\t' and char is not '\n'
        save into current string
    else if char is '\t'
        terminate current string
        increment column index
    else if char is '\n'
        terminate current string
        increment row index

修改

getChar()的问题在于,您只知道一旦到达下一个标签,该字符串的长度。

因此,您必须首次迭代才能知道字符串的长度,然后分配适当的内存量,或者您需要始终分配一个安全的内存量(最大字符串长度)。

然后在这两个选项中,您可以使用strcat()来连接字符串,但您也可以通过其索引访问charchar*(字符串)中的char[]

char string[] = "MINE"; // string[0] -> 'M'
string[0] = 'N'; // string -> "NINE"

// with dynamic memory allocation
char *string = (char*) malloc(5*sizeof(char));
string[0] = 'N'; // string -> "N"

答案 1 :(得分:1)

您可以按行读取,然后使用'\ t'作为分隔符应用strtok()。

Reference for strtok() (it's from cplusplus.com, but strtok is in string.h, so it works also in c)

答案 2 :(得分:1)

发布的代码包含几个问题:

  1. 未使用的堆栈变量records[][]
  2. 未使用的参数:argc
  3. 未使用的参数:argv
  4. 将所有输入数据放入数组中的第一个条目:country [],sport [],gender [],medal [] I.E.所有其他条目均未使用
  5. 不处理包含任何空格的“列”
  6. 包含未使用这些内容的头文件:string.h
  7. 以下代码干净地编译并执行所需的功能:

    #include <stdio.h>
    #include <stdlib.h>
    //#include <string.h>
    
    #define ROWS 1000
    //#define COLS 30
    #define MAX_CH 49
    
    struct record
    {
        char country[ MAX_CH+1 ];
        char sport  [ MAX_CH+1 ];
        char gender [ MAX_CH+1 ];
        char medal  [ MAX_CH+1 ];
    };
    
    int main( void ) 
    {
        struct record records[ROWS];
    
        for( size_t i=0; i< ROWS; i++)
        {
            if( scanf(" %" MAX_CH "[^\t] %" MAX_CH "[^\t] %" MAX_CH "[^\t] %" MAX_CH "[^\n]", 
                    records[i].country, 
                    records[i].sport, 
                    records[i].gender, 
                    records[i].medal) == 4) 
            {
                printf("%s %s %s %s\n", 
                        records[i].country, 
                        records[i].sport, 
                        records[i].gender, 
                        records[i].medal);
            }
    
            else
            {
                break;
            }
        }
    
        return 0;
    } // end function: main
    

答案 3 :(得分:1)

getchar可用于读取行和列。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#define ROWS 1000
#define COLS 30
#define MAX_CH 50

void print_array(char str[][COLS][MAX_CH], int nrows, int ncols, int col[], int reorder);
void sort_array(char str[][COLS][MAX_CH], int nrows, int ncols, int col[]);


int main( int argc, char *argv[])
{
    char records[ROWS][COLS][MAX_CH] = { { { '\0'}}};//set each element
    char item[MAX_CH] = { '\0'};//buffer for each item
    //char yn[40];
    int row = 0;
    int col = 0;
    int usedcol = 0;
    int sortby[COLS] = { 0};
    int ch = 0;
    int each = 0;
    int loop = 0;
    int result = 0;

    if ( argc < 2 || argc > COLS + 1) {
        fprintf ( stderr, "syntax is\n%s column0 [column1]...[column%d] < inputfile\n", argv[0], COLS - 1);
        exit(EXIT_FAILURE);
    }

    for ( each = 1; each <= COLS + 1; each++) {// +1 to get to extra element
        sortby[each - 1] = -1;//extra element will be -1
        if ( each < argc) {
            if ( ( result = sscanf ( argv[each], "%d", &sortby[each - 1])) != 1 || sortby[each - 1] < 0 || sortby[each - 1] >= COLS) {
                fprintf ( stderr, "syntax is\n%s column0 [column1]...[column%d] < inputfile\n", argv[0], COLS - 1);
                fprintf ( stderr, "column%d must be 0 to %d\n", each - 1, COLS - 1);
                exit(EXIT_FAILURE);
            }
        }
    }

    for ( each = 0; each < argc - 1; each++) {
        for ( loop = 0; loop < argc - 1; loop++) {
            if ( loop != each && sortby[each] == sortby[loop]) {
                fprintf ( stderr, "found duplicate columns in args\n");
                exit(EXIT_FAILURE);
            }
        }
    }
    //printf ( "\n");

    if ( isatty ( fileno ( stdin))) {
        printf ( "expected that a file would be redirected to this program\n");
        printf ( "syntax is\n%s column0 [column1]...[column%d] < inputfile\n", argv[0], COLS - 1);
        //printf ( "enter y to continue WITHOUT the redirected file?\n");
        //if ( ( fgets ( yn, sizeof ( yn), stdin))) {
            //if ( strcmp ( yn, "y\n") != 0) {
                exit(EXIT_FAILURE);
            //}
        //}
    }

    row = 0;
    col = 0;
    each = 0;
    while ( ( ch = getchar ( )) != EOF) {//read each character
        if ( ch == '\r') {//skip carriage return
            continue;
        }
        if ( ch == '\t') {
            strcpy ( records[row][col], item);
            each = 0;//back to first character
            col++;//next column
            if ( col >= COLS) {
                col = 0;
                row++;
                if ( row >= ROWS) {
                    fprintf ( stderr, "too many rows\n");
                    break;
                }
            }
            continue;
        }
        if ( ch == '\n') {
            strcpy ( records[row][col], item);
            col++;
            if ( col > usedcol) {
                usedcol = col;
            }
            col = 0;//back to first column
            each = 0;//back to first character
            row++;//next row
            if ( row >= ROWS) {
                fprintf ( stderr, "too many rows\n");
                break;
            }
            continue;
        }
        item[each] = ch;
        each++;//next character
        item[each] = '\0';//terminate with '\0'
        if ( each >= MAX_CH - 1) {
            fprintf ( stderr, "too many characters in item\n");
            each = 0;
            col++;
            if ( col >= COLS) {
                col = 0;
                row++;
                if ( row >= ROWS) {
                    fprintf ( stderr, "too many rows\n");
                    break;
                }
            }
            continue;
        }
    }

    print_array ( records, row, usedcol, sortby, 0);

    sort_array ( records, row, usedcol, sortby);

    print_array ( records, row, usedcol, sortby, 1);

    return 0;
}

void print_array(char str[][COLS][MAX_CH], int nrows, int ncols, int col[], int reorder) {
    int i, j;
    int order[COLS] = { 0};

    for ( i = 0; i < COLS; i++) {
        if ( reorder) {
            order[i] = col[i];
        }
        else {
            order[i] = i;
        }
    }

    if ( reorder) {
        for ( i = 0; i < COLS; i++) {
            for ( j = 0; j < COLS; j++) {
                if ( order[j] == i) {
                    break;
                }
                if ( order[j] == -1) {
                    order[j] = i;
                    break;
                }
            }
        }
    }

    for (i = 0; i < nrows; i++) {
        for (j = 0; j < ncols; j++) {
            printf("%-12s ", str[i][order[j]]);
        }
        printf("\n");
    }
    printf("\n");
}

void sort_array(char str[][COLS][MAX_CH], int nrows, int ncols, int col[]) {
    int i = 0, j = 0, swap = 0, each = 0;
    char temp[MAX_CH] = { '\0'};

    do {
        swap = 0;
        for ( i = 1; i < nrows - 1; i++) {//iterate through rows. i=1 skip first row

            for ( each = 0; col[each] != -1; each++) {//col[] has last element of -1

                if ( strcmp( str[i][col[each]], str[i + 1][col[each]]) < 0) {
                    break;
                }
                if ( strcmp( str[i][col[each]], str[i + 1][col[each]]) == 0) {
                    continue;
                }
                for ( j = 0; j < ncols; j++) {//iterate through cols and swap rows
                    strcpy ( temp, str[i][j]);
                    strcpy ( str[i][j], str[i + 1][j]);
                    strcpy ( str[i + 1][j], temp);
                }
                swap = 1;
                break;
            }
        }
    } while ( swap);//loop until no swaps
}