字符串计数器的字频有时是错误的

时间:2016-09-22 20:13:22

标签: c string count

我希望你能帮我解决这个问题。代码就像这样

  • 用户输入一个字符串,例如“嘿john,你好吗约翰?
  • 程序会删除“'?'等标志。 ,',''!' “等等。
  • 程序在删除标志后写了一个字符串:“嘿约翰,你好吗约翰?”

  • 并且代码输出每个单词的频率:

嘿:1 约翰:2 方法:1 是:1 你:1

但我的代码有时会错误。例如,当我输入“bye bye bye hello hello hello”

输出是: 再见:3 你好:1

我的代码正确地执行了john示例,但是再见...示例错误。

如何更改代码?谢谢

#include <stdio.h>
#include <string.h>


char words[80][80];

void clear_string(char *text);
int extract_and_count(char *source,  int *count);
void clearArray(char array[]);
int indexInWords(char string[]);
void print(int countOfWords, int count[]);
int equals(char *string1, char *string2);


int main() {
    char string[80];
    int count[80];

    printf("please enter your text: ");

    scanf("%[^\n]s", string);

    clear_string(string);

    printf("%s\n", string);
    int countOfWords = extract_and_count(string, count);

    print(countOfWords, count);

    return 0;
}

void clear_string(char *text){

    int i = 0;

    for(;i < strlen(text);++i){
        if( text[i] == '.' || text[i] == ',' || text[i] == '!' || text[i] == '?'){

            int k = i + 1;
            for(; k < strlen(text);++k){
                text[k-1] = text[k];
            }

            k = strlen(text) - 1;
            text[k] = ' ';
        }
    }
}

int extract_and_count(char *source,  int *count){

    int wordCounter = 0;
    char string[80];
    int i = 0, k = 0;
    clearArray(string);

    for(; i < strlen(source);++i, ++k){

        if(source[i] != ' '){
                string[k] = source[i];
        }else{
            if(string[0] == '\0'){
                break;
            }
            int index = indexInWords(string);

            if(index == -1){

                strcpy(words[wordCounter], string);
                count[wordCounter] = 1;
                wordCounter++;
            }else{
                count[index] += 1;
            }

            clearArray(string);
            k = -1;


        }

    }

    return wordCounter;
}

void clearArray(char array[]){
    memset(array,0,strlen(array));
    //array[0] = '\0';
}

int indexInWords(char string[]){

    int i = 0;
    for(;i < 80;++i){
        if(equals(words[i], string) == 0){
            return i;
        }
    }

    return -1;
}

void print(int countOfWords, int count[]){


    for(int i = 0;i < countOfWords; ++i){
        printf("%s : %d\n",words[i], count[i]);

    }






}

int equals(char string1[], char string2[]){



    return strcmp(string1, string2);
}

2 个答案:

答案 0 :(得分:1)

我发现的最重要的问题是在extract_and_count() - 它不计算最后一个字,因为它只计算单词后跟空格。 bandaid用于检查循环后string是否包含任何内容,如果是,则处理它。以下是我对该修复和一般风格的修改:

#include <stdio.h>
#include <string.h>
#include <stdbool.h>

void clear_string(char *text);
int extract_and_count(char *source, int count[]);
void clearArray(char array[]);
int indexInWords(char string[]);
void print(int countOfWords, int count[]);
bool equals(char *string1, char *string2);

#define BUFFER_SIZE (512)
#define MAX_WORD_COUNT (80)
#define MAX_WORD_SIZE (64)

char words[MAX_WORD_COUNT][MAX_WORD_SIZE];

int main() {
    char string[BUFFER_SIZE];
    int count[MAX_WORD_COUNT];

    printf("Please enter your text: ");

    while (fgets(string, BUFFER_SIZE, stdin) == NULL) {
        printf("Please (re)enter your text: ");
    }

    clear_string(string);

    int countOfWords = extract_and_count(string, count);

    print(countOfWords, count);

    return 0;
}

void clear_string(char *text) {

    for (int i = 0; i < strlen(text); i++) {
        if (text[i] == '.' || text[i] == ',' || text[i] == '!' || text[i] == '?' || text[i] == '\n') {

            int length = strlen(text);

            for (int k = i + 1; k < length; k++) {
                text[k - 1] = text[k];
            }

            text[length - 1] = '\0';

            i--;
        }
    }
}

int extract_and_count(char *source, int count[]) {

    int wordCounter = 0;
    char string[MAX_WORD_SIZE] = {'\0'};

    for (int i = 0, k = 0; i < strlen(source); i++, k++) {

        if (source[i] != ' ') {
            string[k] = source[i];
        } else {
            if (string[0] == '\0') {
                break;
            }
            int index = indexInWords(string);

            if (index == -1) {
                strcpy(words[wordCounter], string);
                count[wordCounter] = 1;
                wordCounter++;
            } else {
                count[index] += 1;
            }

            clearArray(string);
            k = -1;
        }
    }

    if (string[0] != '\0') {
        int index = indexInWords(string);

        if (index == -1) {
            strcpy(words[wordCounter], string);
            count[wordCounter] = 1;
            wordCounter++;
        } else {
            count[index] += 1;
        }
    }

    return wordCounter;
}

void clearArray(char array[]) {
    memset(array, 0, strlen(array));
}

int indexInWords(char string[]) {

    for (int i = 0; i < MAX_WORD_COUNT; i++) {
        if (equals(words[i], string)) {
            return i;
        }
    }

    return -1;
}

void print(int countOfWords, int count[]) {

    for (int i = 0; i < countOfWords; i++) {
        printf("%s : %d\n", words[i], count[i]);
    }
}

bool equals(char string1[], char string2[]) {
    return strcmp(string1, string2) == 0;
}

我看到的下一个最重要的问题是,您没有跟踪words[][]中使用了多少条目,因此indexInWords()可能很容易偏离与未初始化的内存进行比较。

答案 1 :(得分:0)

在extract_and_count中,当你找到2个空格时,你会跳出for循环。你也没有检查源的最后一个字。将其更改为:

int extract_and_count(char *source,  int *count){

    int wordCounter = 0;
    char string[80];
    int i = 0, k = 0;
    clearArray(string);
    for(; i < strlen(source)+1;++i, ++k){ 

        if(source[i] != ' ' && source[i] != 0){
                string[k] = source[i];
        }else{
            if(string[0] != '\0'){
                int index = indexInWords(string);

              if(index == -1){

                  strcpy(words[wordCounter], string);
                  count[wordCounter] = 1;
                  wordCounter++;
              }else{
                  count[index] += 1;
              } }

            clearArray(string);
            k = -1;


        }

    }

    return wordCounter;
}