从文本(字符数组)中提取单词并将它们放在2维数组中的函数

时间:2016-09-30 12:03:17

标签: c arrays

我正在学习C并且有一些挣扎。我必须制作一个程序,它成为一个文本(最多80个字符)并将文字中的单词放入char字[80] [80](每个单词必须是这个数组中只有一次!它也被定义为全局)和每个单词在int count []中的文本中出现的次数(索引必须与单词[] []中的相同。 该函数称为int extract_and_count(char * source,int * count)。 我写了一些代码,但我不确定如何实现这个功能。有人帮我吗? 我也是stackoverflow的新手,所以如果我犯了任何错误,抱歉。

这是一些代码,但它还没到最后:

 int extract_and_count(char *source,int *count){
  char token[80][80];
  char *p;
  int i = 0;
  p = strtok(source, " ");
  while( p != NULL ){
    strcpy(token[i],p);
    printf("%s\n",*(token+i));
    i++;
    p = strtok(NULL , " ");
  }
  char word;
  int value = 0, j;
  for(i = 0 ; i < 80 ; i++){
    word = token[i];
    for(j = 0 ; j < 80 ; j++){
      if(strcmp(word,token[i])==0){
    value++;
      }
    }

  }
  return 1;
}

3 个答案:

答案 0 :(得分:0)

 while( p != NULL ){
    strcpy(token[i],p);
    printf("%s\n",*(token+i));
    i++;
    p = strtok(NULL , " ");   --> here you are just splitting the words
  }

现在,令牌将以分割的方式包含所有单词,而不是根据您的要求,#34;每个单词只有一次&#34;。您可以将唯一的单词与另一个数组进行比较和复制,并且在同一个循环中,您可以计算并更新计数数组。

注意:你不应该在整体上使用一个计数器变量,只能使用计数器数组来计算单词。

答案 1 :(得分:0)

您需要检查是否已找到某个字词。如果是这样,只需增加全局计数器。否则,将新单词复制到全局字符串数组。

类似的东西:

#include <stdio.h>
#include <string.h>

// Global variables to hold the results
char word[80][81];
int  count[80] = { 0 };

int extract_and_count(char *source,int *strings_cnt){
  char token[80][81];
  char *p;
  int i = 0;

  // Find all words in the input string
  p = strtok(source, " ");
  while( p != NULL ){
    strcpy(token[i],p);
    // printf("%s\n",*(token+i));
    i++;
    p = strtok(NULL , " ");
  }

  // Find unique words and count the number a word is repeated
  *strings_cnt = 0;
  int j,k;

  // Iterator over all words found in the input string
  for(j = 0 ; j < i ; j++){

    // Check if the word is already detected once
    int found = 0;
    for(k = 0 ; k < *strings_cnt ; k++){
      if (strcmp(word[k], token[j]) == 0)
      {
        // The word already exists - increment count
        found = 1;
        count[k]++;
        break;
      }
    }

    if (!found)
    {
      // New word - copy it and set count to 1
      strcpy(word[*strings_cnt], token[j]);
      count[*strings_cnt] = 1;
      (*strings_cnt)++;
    }
  }

  return 1;
}

int main(void)
{
  char s[] = "c language is difficult c is also fun";
  int c, i;

  printf("Searching: %s\n", s);

  extract_and_count(s, &c);

  printf("Found %d different words\n", c);
  for (i=0; i<c; i++)
  {
    printf("%d times: %s\n", count[i], word[i]);
  }
  return 0;
}

<强>输出:

Searching: c language is difficult c is also fun
Found 6 different words
2 times: c
1 times: language
2 times: is
1 times: difficult
1 times: also
1 times: fun

上面我尝试按照你的代码风格,但我想添加这些评论:

1)你真的不需要token数组。可以更改第一个循环,以便直接更新最终结果。

2)不要使用全局变量

3)代码不能处理普通分隔符,如,。 :等等

4)你应该将单词和计数放入结构中。

考虑到评论1,2和4,代码可以是:

#include <stdio.h>
#include <string.h>

// Global variables to hold the results
struct WordStat
{
  char word[81];
  int count;
};


int extract_and_count(char *source,int *strings_cnt, struct WordStat* ws, int max){
  char *p;
  int i = 0;
  int k;
  *strings_cnt = 0;

  // Find all words in the input string
  p = strtok(source, " ");
  while( p != NULL ){
    // Check if the word is already detected once
    int found = 0;
    for(k = 0 ; k < *strings_cnt ; k++){
      if (strcmp(ws[k].word, p) == 0)
      {
        // The word already exists - increment count
        found = 1;
        ws[k].count++;
        break;
      }
    }

    if (!found)
    {
      // New word - copy it and set count to 1
      strcpy(ws[*strings_cnt].word, p);
      ws[*strings_cnt].count = 1;
      (*strings_cnt)++;
    }

    i++;
    p = strtok(NULL , " ");
  }

  return 1;
}

#define MAX_WORDS 80

int main(void)
{
  struct WordStat ws[MAX_WORDS];
  char s[] = "c language is difficult c is also fun";
  int c, i;

  printf("Searching: %s\n", s);

  extract_and_count(s, &c, ws, MAX_WORDS);

  printf("Found %d different words\n", c);
  for (i=0; i<c; i++)
  {
    printf("%d times: %s\n", ws[i].count, ws[i].word);
  }
  return 0;
}

答案 2 :(得分:0)

#include <stdio.h>
#include <string.h>
#include <ctype.h>

#define NUM_OF_WORDS_MAX 80
#define MAX_WORD_LENGTH  79
#define S_(x) #x
#define S(x) S_(x) //number literal convert to string

char words[NUM_OF_WORDS_MAX][MAX_WORD_LENGTH+1];
int Words_entry = 0;

static inline int hash(const char *str){
    return (tolower(*str) - 'a')*3;//3:(NUM_OF_WORDS_MAX / 26), 26 : a-z
}

char *extract(char **sp){//extract word
    char *p = *sp;
    while(*p && !isalpha(*p))//skip not alpha
        ++p;
    if(!*p)
        return NULL;
    char *ret = p;//first word
    while(*p && isalpha(*p))//skip alpha
        ++p;//*p = tolower(*p);
    if(!*p){
        *sp = p;
    } else {
        *p = '\0';
        *sp = ++p;//rest
    }

    return ret;
}

int extract_and_count(char *source, int *count){
    char *sp = source;
    char *word;
    int word_count = 0;

    while(word = extract(&sp)){
        if(Words_entry == NUM_OF_WORDS_MAX){
            fprintf(stderr, "words table is full.\n");
            return word_count;
        }

        int index = hash(word);
        while(1){
            if(*words[index]){
                if(strcasecmp(words[index], word) == 0){//ignore case
                    ++count[index];
                    break;
                }
                if(++index == NUM_OF_WORDS_MAX){
                    index = 0;
                }
            } else {
                strcpy(words[index], word);
                count[index] = 1;
                ++Words_entry;
                break;
            }
        }
        ++word_count;
    }
    return word_count;
}

int main(void){
    int count[NUM_OF_WORDS_MAX] = {0};
    char text[MAX_WORD_LENGTH+1];

    while(1==scanf("%" S(MAX_WORD_LENGTH) "[^\n]%*c", text)){//end if only enter press.
        extract_and_count(text, count);
    }
    //print result
    for(int i = 0; i < NUM_OF_WORDS_MAX; ++i){
        if(*words[i]){
            printf("%s : %d\n", words[i], count[i]);
        }
    }
    return 0;
}