如何扫描文本文件的单词和单词计数?

时间:2016-03-06 15:20:35

标签: c file text word-count

如何扫描单词的文本文件并在每次扫描时增加计数?

我的程序main将两个关键字和一些文件作为命令行参数。在命令行中作为参数传递的每个文件将包含最多160个字符的消息列表。 例如:

$ ./main Hello Bye data1.txt data2.txt

将输出

3 messages containing Hello and 1 messages containing Bye
2 messages containing Hello and 2 messages containing Bye

在main.c中的do_file方法中,stats_add_data()在调用时不起作用,我不知道为什么。我检查了指针,看起来是正确的。

void do_file(char *filename, char *key1, char *key2) {

    stats *newstats;
    newstats = stats_create(key1, key2);

    FILE *f = fopen(filename, "r");
    char *keyword1 = newstats->key1;
    char *keyword2 = newstats->key2;

    int line_num = 1;
    int find_result1 = 0;
    int find_result2 = 0;
    char temp[160];

    const char delimter[1] = " ";
    char onewordtoken[160];

    // Get a line, up to 160 chars from file
    while(fgets(temp, 160, f) != NULL) {
        if((strstr(temp, key1)) != NULL) {
            printf("A match found on line: %d\n", line_num);
            printf("\n%s\n", temp);
            find_result1++;

            stats_add_data(newstats, temp);

            stats_print(newstats);
            printf("\n\n");
        }
        if((strstr(temp, key2)) != NULL) {
            printf("A match found on line: %d\n", line_num);
            printf("\n%s\n", temp);
            find_result2++;

            stats_add_data(newstats, temp);

            stats_print(newstats);
            printf("\n\n");
        }
        line_num++;
    }

    if((find_result1 == 0) && (find_result2 == 0)) {
        printf("\nSorry, couldn't find a match.\n");
    }
}

参考代码:

stats.c

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "stats.h"

// The stats_create function should allocate and return a new stats structure, storing key1 and key2 in the structâs string fields and setting the two integers to 0.
stats *stats_create(char *key1, char *key2) {
    stats *new = malloc(sizeof *new);
    new->key1 = key1;
    new->key2 = key2;
    new->int1 = 0;
    new->int2 = 0;
    return new;
}

// The stats_add_data function should update the structure pointed to by s against the value val. That is, if val contains the first keyword, the integer containing the count of the number of messages containing that keyword should be updated. Similarly for the second keyword.
void stats_add_data(struct stats *s, char *val) {
    int count1 = 0;
    int count2 = 0;
    char* key1 = s->key1;
    char* key2 = s->key2;

    if (strcmp(s->key1, val) == 0) {
        count1++;
    }
    if (strcmp(s->key2, val) == 0) {
        count2++;
    }

    count1 = s->int1;
    count2 = s->int2;
}

// The stats_print function should print on one line the number of messages containing the first keyword, and the number of keywords containing the second keyword. E.g. 2 messages containing Homeland and 2 messages containing Elementary.

void stats_print(struct stats *s) {
    int count1 = s->int1;
    int count2 = s->int2;
    char* key1 = s->key1;
    char* key2 = s->key2;

    printf("%d", count1);
    printf(" messages containing ");
    printf("%s", key1);
    printf(" and ");
    printf("%d", count2);
    printf(" messages containing ");
    printf("%s", key2);
}

// The stats_free function should free the given stats structure and any memory that might have been allocated for it.
void stats_free(struct stats *s) {
    if(s) {
        free(s);
    }
}

的main.c

#include <stdio.h>
#include <string.h>
#include "stats.h"

// A function void do_file(char *filename, char *key1, char*key2) that takes a file name and two keywords as its arguments. It should create a statistics struct that will be used to accumulate the statistics about the data in the file.

//Open the given filename, and iterate over each line of the file, reading in the data value and updating the statistics struct. Once the file has been read, close it and display the results.
void do_file(char *filename, char *key1, char *key2) {

    stats *newstats;
    newstats = stats_create(key1, key2);

    FILE *f = fopen(filename, "r");
    char *keyword1 = newstats->key1;
    char *keyword2 = newstats->key2;
    //char keyword1 = *key1;
    //char keyword2 = *key2;

    int line_num = 1;
    int find_result1 = 0;
    int find_result2 = 0;
    char temp[160];

    if((f = fopen(filename, "r")) == NULL) {
        printf("File does not exist!\n");
    }

    const char delimter[1] = " ";
    char onewordtoken[160];

    // Get a line, up to 160 chars from file
    while(fgets(temp, 160, f) != NULL) {
        if((strstr(temp, key1)) != NULL) {
            printf("A match found on line: %d\n", line_num);
            printf("\n%s\n", temp);
            find_result1++;

            stats_add_data(newstats, temp);

            stats_print(newstats);
            printf("\n\n");
        }
        if((strstr(temp, key2)) != NULL) {
            printf("A match found on line: %d\n", line_num);
            printf("\n%s\n", temp);
            find_result2++;

            stats_add_data(newstats, temp);

            stats_print(newstats);
            printf("\n\n");
        }
        line_num++;
    }

    stats_free(newstats);

    if((find_result1 == 0) && (find_result2 == 0)) {
        printf("\nSorry, couldn't find a match.\n");
    }

    // Close the file if still open
    if(f) {
        fclose(f);
    }
}

// A main function that iterates over each file argument, except the first, and runs do_file on it. After the 0th argument, the first two command line arguments will be the keywords to process. The remaining arguments will be the file arguments.

int main(int argc, char **argv) {

    if(argc > 2) {
        //printf("The argument supplied is %s\n", argv[1]);
        //stats_add_data();
        do_file("data.txt", argv[1], argv[2]);
        printf("\n\n");
        //stats_free(s);
    } else {
        printf("Too few arguments supplied.\n");
        //stats_free(s);
    }
}

1 个答案:

答案 0 :(得分:0)

编译时,始终启用所有警告,然后修复这些警告。

以下代码干净地编译并消除了子函数(它们在其余代码中成为一个衬里。)

函数:free()正确处理NULL指针,因此在调用free()

之前无需测试指针是否为NULL

发布的代码缺少stats.h的内容,因此我插入了原型和struct stats定义以及typedef语句

(非常糟糕的做法是使struct标记名和typedef名称相同)

检查最少数量的命令行参数应该向stderr输出usage消息,而不是stdout,并且应该指示正确的命令行参数格式。

修改了非常长的评论以适应页面,包括次要内容更改。

以下是建议的代码:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
//#include <stdio.h>
//#include <string.h>
//#include "stats.h"
#include <errno.h>

#define MAX_LINE_LEN (160)

struct stats
{
    char *key1;
    char *key2;
    int int1;
    int int2;
};

typedef struct stats stats;


// prototypes
stats *stats_create(char *key1, char *key2);
#if 0
    void stats_add_data(struct stats *s, char *val);
#endif
void stats_print(struct stats *s);
#if 0
    void stats_free(struct stats *s);
#endif
void do_file(char *filename, char *key1, char *key2);


// the following comment is not correct:

/*
 * A main function that iterates over each file argument, 
 * except the first, and runs do_file on it. 
 * After the 0th argument, 
 * the first two command line arguments will be the 
 * keywords to process. 
 *The remaining arguments will be the file arguments.
 */

int main(int argc, char **argv)
{
    //if(argc > 2)
    if( argc > 3 )
    {
        //printf("The argument supplied is %s\n", argv[1]);
        //stats_add_data();
        for( int i=3; i<argc; i++ )
        {
            do_file( argv[i], argv[1], argv[2] );
        }
        //do_file("data.txt", argv[1], argv[2]);
        printf("\n\n");
        //stats_free(s);
    }

    else
    {
        printf("Too few arguments supplied.\n");
        //stats_free(s);
    }
} // end function: main



/*
 * The stats_create function should allocate 
 * and return a new stats structure, 
 * storing key1 and key2 in the struct 
 * as pointers to string fields and setting the two integers to 0.
 */

stats *stats_create(char *key1, char *key2)
{
    stats *new = malloc(sizeof *new);
    new->key1 = key1;
    new->key2 = key2;
    new->int1 = 0;
    new->int2 = 0;
    return new;
} // end function: stats_create


#if 0
    // The stats_add_data function should update the structure pointed to by s against the value val. That is, if val contains the first keyword, the integer containing the count of the number of messages containing that keyword should be updated. Similarly for the second keyword.
    void stats_add_data(struct stats *s, char *val)
    {
        //int count1 = 0;
        //int count2 = 0;
        // char* key1 = s->key1;
        // char* key2 = s->key2;

        if (strcmp(s->key1, val) == 0)
        {
            //count1++;
            s->int1++;
        }

        if (strcmp(s->key2, val) == 0)
        {
            //count2++;
            s->int2++;
        }

        //count1 = s->int1;
        //count2 = s->int2;
    } // end function: stats_add_data
#endif

/*
 * The stats_print function should print on one line
 * the number of messages containing the first keyword, 
 * and the number of messages containing the second keyword. 
 * E.g. 2 messages containing Homeland and 2 messages containing Elementary.
 */

void stats_print(struct stats *s)
{
    //int count1 = s->int1;
    //int count2 = s->int2;
    //char* key1 = s->key1;
    //char* key2 = s->key2;

    //printf("%d", count1);
    //printf(" messages containing ");
    //printf("%s", key1);
    //printf(" and ");
    //printf("%d", count2);
    //printf(" messages containing ");
    //printf("%s", key2);
    printf( "%d messages containing <%s> and %d messages containing <%s>\n",
            s->int1, s->key1, s->int2, s->key2 );
} // end function: stats_print


#if 0
    // The stats_free function should free the given stats structure and any memory that might have been allocated for it.
    void stats_free(struct stats *s)
    {
        if(s) {
            free(s);
        }
    } // end function: stats_free
#endif

/*
 * A function void do_file(char *filename, char *key1, char*key2)
 * that takes a file name and two keywords as its arguments. 
 * It should create a statistics struct 
 * that will be used to accumulate the statistics
 * about the data in the individual file.
 */

/*
 * Open the given filename, 
 * iterate over each line of the file, 
 * searching for the two key words in the line
 * updating the statistics struct for each line read. 
 * after the whole file has been read, 
 * close it and display the results.
 */

void do_file(char *filename, char *key1, char *key2)
{

    stats *newstats = NULL;
    newstats = stats_create(key1, key2);

    // FILE *f = fopen(filename, "r");
    // char *keyword1 = newstats->key1;
    // char *keyword2 = newstats->key2;
    // char keyword1 = *key1;
    // char keyword2 = *key2;

    int line_num = 1;
    // int find_result1 = 0;
    // int find_result2 = 0;
    char temp[ MAX_LINE_LEN+1]; //+1 to allow for NUL terminator byte

    FILE *f = NULL;
    if((f = fopen(filename, "r")) == NULL)
    {
        fprintf( stderr, "fopen for %s failed due to: %s\n", filename, strerror( errno ) );
        // exit( EXIT_FAILURE );
        return;
        //printf("File does not exist!\n");
    }

    // implied else, fopen successful

    // const char delimter[1] = " ";
    // char onewordtoken[160];

    // Get a line, up to 160 chars from file
    while(fgets(temp, sizeof temp, f) != NULL)
    {
        if((strstr(temp, key1)) != NULL)
        {
            printf("A match found on line: %d\n", line_num);
            printf("\n%s\n", temp);
            //find_result1++;

            //stats_add_data(newstats, temp);
            newstats->int1++;

            stats_print(newstats);
            printf("\n\n");
        }

        if((strstr(temp, key2)) != NULL)
        {
            printf("A match found on line: %d\n", line_num);
            printf("\n%s\n", temp);
            //find_result2++;

            //stats_add_data(newstats, temp);
            newstats->int2++;

            //stats_print(newstats);
            //printf("\n\n");
        }

        line_num++;
    } // end while

    stats_print( newstats );
    printf("\n\n");

    // stats_free(newstats);

    //if((find_result1 == 0) && (find_result2 == 0)) {
    if( 0 == newstats->int1 && 0 == newstats->int2 )
    {
        printf("\nSorry, couldn't find a match.\n");
    }

    free( newstats );

    // Close the file if still open
    //if(f) {
    //    fclose(f);
    //}
    // would not get here if file did not open
    fclose( f );
} // end function: do_file