Question

我正在尝试完成我的一个C作业问题。这是定义和示例IO：

描述

给出一篇文章作为输入。您必须计算每个单词的数量，并按字母顺序打印单词列表。

示例输入

这是最好的时期，这是最糟糕的时期，它是时代的智慧，这是愚蠢的时代，它是信仰的时代，它是一个令人怀疑的时代，它是光明的季节，它是黑暗的季节，它是希望的春天，它是冬天的绝望，我们面前有一切，我们面前没有任何东西，我们所有人都直接走向天堂，我们都直接走向另一个方式。

示例输出

age 2
all 2
before 2
belief 1
best 1
darkness 1
despair 1
direct 2
epoch 2
everything 1
foolishness 1
going 2
had 2
heaven 1
hope 1
incredulity 1
it 10
light 1
nothing 1
of 10
other 1
season 2
spring 1
the 11
times 2
to 1
us 2
was 10
way 1
we 4
were 2
winter 1
wisdom 1
worst 1

现在这是我的代码：

在main.c中：

#include <stdio.h>
#include <stdlib.h>
#include "function.h"
#include <string.h>

int main()
{
    char wordcollected [3100] = {0};
    char *word_ptr[100];
    int countarray[100];
    static char temp[31];
    int nth_word = 0;
    while(1){
        int n = strlen(wordcollected);
        word_ptr [nth_word] = wordcollected + strlen(wordcollected);
        if(strcpy(temp, fetch_word()) == NULL){
            for(n == strlen(wordcollected); n >= 0; n--){
            if(wordcollected[n] == ','){
                wordcollected[n] = '\0';
            }
        }
            break;
        }
        strcat((wordcollected), temp);
        strcat((wordcollected), ",");
        nth_word ++;
    }

}

我们的TA已经为我们完成了部分代码：

在function.c中：

#include "function.h"
#include <stdio.h>

// fetch words from stdin
const char *fetch_word(){
    static char skip_symbol[] = " \t\n,.;:?()[]{}\"\"''" ;

    static char line_buffer[1024] ;
    static char *now = NULL ;

    // try to read a line from stdin
    for( ;; ){
        if( now == NULL)
            now = fgets( line_buffer, sizeof(line_buffer), stdin ) ;

        // End Of File?
        if( now == NULL )
            return NULL ;

        // skip symbols
        for( ; *now ; now++ ){
            int size = sizeof( skip_symbol ) ;
            int i ;
            for( i=0 ; i<size ; i++ ){
                if( *now == skip_symbol[i] )
                    break ;
            }

            // if not match skip_symbol[]
            if( i >= size )
                break ;
        }

        // End Of Line?
        if( *now == '\0' ){
            now = NULL ;
            continue ;
        }

        char *word = now ;

        for( ; *now ; now++ ){
            int size = sizeof( skip_symbol ) ;
            int i ;
            for( i=0 ; i<size ; i++ ){
                if( *now == skip_symbol[i] )
                    break ;
            }

            // if match skip_symbol[]
            if( i < size )
                break ;
        }

        if( *now ){
            *now = '\0' ;
            now ++ ;
        }

        return word ;
    }

    return NULL ;
}

在function.h中：

#ifndef __FUNCTION_H__
#define __FUNCTION_H__

// fetch words from stdin
const char *fetch_word() ;

#endif

函数* fetch_word（）将在运行时返回指向stdin中每个单词的指针，如果函数已经到达End-Of-File，它也将返回NULL。但每当它达到EOF时，它就会继续说分段故障并且系统停止运行。如何检测fetch_word（）的返回值，知道何时到达文件结尾，还防止丢失任何单词？

Answer 1

在进行循环之前，您需要在结束标志处打破：

    if(strcpy(temp, fetch_word()) == NULL){
        break;
        for(n == strlen(wordcollected); n >= 0; n--){
        if(wordcollected[n] == ','){
            wordcollected[n] = '\0';
        }
    }
    }

但请接受我的建议并重构您的代码并使其更具可读性。你会节省很多时间。

另外，在算法上，对我来说，似乎你可能想要创建一个单词的链接列表，与一个数字配对（实现链接列表数据结构以实现这一点），每当你读到一个单词时，试着找到它链接列表，直到您按字母顺序或列表末尾或匹配方式达到低于单词的内容。如果找到匹配项，请在数字中添加1。否则，在相应的位置插入带有1作为值的单词。

Answer 2

对于提取单词，您可以使用以下fetch_words（）。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

void print_word(const char *word, void *arg)
{
    printf("[Word] <%s>\n", word);
}

/*
 * @fp   file pointer
 * @func recall function whch has two params (word, arg)
 * @arg  the second param of func
 * */
void fetch_words(FILE *fp, void (*func)(const char *, void *), void *arg)
{
    char area[256] = {0};
    char buff[128];
    int size = sizeof(buff) - 1;
    char *delim = " \t\n,.;:?(){}\"'";
    char *last;
    char *token;
    int len;
    char *pos;

    while (fgets(buff, size, fp) != NULL)
    {
        /* Append part of buff into area */
        len = size - strlen(area);
        strncat(area, buff, len);
        pos = area + strlen(area);

        /* Split string in area */
        if ((token = strtok(area, delim)) != NULL)
        {
            last = token;
            while ((token = strtok(NULL, delim)) != NULL)
            {
                func(last, arg);
                last = token;
            }

            if (last + strlen(last) == pos)
            {
                /* Copy last token into area  */
                strcpy(area, last);
            }
            else
            {
                /* Clean area */
                area[0] = 0;
                func(last, arg);
                last = NULL;
            }
        }

        /* Append left part of buff into area for next loop */
        if (len < strlen(buff))
        {
            strcat(area, buff + len);
        }
    }

    if (last)
    {
        func(last, arg);
    }

}

int main(int argc, char *argv)
{
    fetch_words(stdin, print_word, NULL);

    return 0;
}

对于字数统计，您可以使用hashmap（键，值）。键是单词，值是单词的数量。

这是C中hashmap的一个简单实现： https://github.com/foreverpersist/hashmap

字数问题

2 个答案: