如何检查C中完全相同的字符是否构成了更多单词?

时间:2019-01-17 16:06:47

标签: c string

我遇到以下问题:我需要输入单词直到EOF,然后再输入由完全相同的字符(不一定是完全相同的字符)组成的单词。

例如:

输入:

"abc" "acb" "abcabc" "cab" "de" "gh" "ab" "ed" "hg" "abcde"

输出:

"abc" "acb" "abcabc" "cab"
"de" "ed"
"gh" "hg"

我将放下到目前为止所做的事情:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main()
{
    char **groups, word[30], **store, n = 0, k = 0;
    store = malloc(1*sizeof(char *));
    if (store == NULL) exit(1);
    for (;;) {
        printf("Enter word: ");
        if (scanf("%s", word) == EOF) break;
        store[n] = malloc((strlen(word)+1)*sizeof(char));
        if (store[n] == NULL) exit(1);
        strcpy(store[n], word);
        n++;
        store = realloc(store, (n+1)*sizeof(char *));
        if (store == NULL) exit(1);
    }
    for (int i=0; i<n; i++) {
        printf("%s ", store[i]);
    }
    return 0;
}

问题是我真的不知道如何检查字符。你能帮我吗?

更新

我尝试按照@jarmod的建议进行操作:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main()
{
    char **groups, word[30], **store, n = 0, k = 0, *aux;
    store = malloc(1*sizeof(char *));
    if (store == NULL) exit(1);
    for (;;) {
        printf("Enter word: ");
        if (scanf("%s", word) == EOF) break;
        store[n] = malloc((strlen(word)+1)*sizeof(char));
        if (store[n] == NULL) exit(1);
        strcpy(store[n], word);
        n++;
        store = realloc(store, (n+1)*sizeof(char *));
        if (store == NULL) exit(1);
    }
    for (int i=0; i<n; i++) {
        printf("%s ", store[i]);
    }
    printf("\n");
    for (int i=0; i<n; i++) {
        for (int j=0; j<strlen(store[i])-1; j++) {
            for (int l=(j+1); l<strlen(store[i]); l++) {
                if (store[i][j] > store[i][l]) {
                    aux = store[i][j];
                    store[i][j] = store[i][l];
                    store[i][l] = aux;
                }
            }
        }
    }
    for (int i=0; i<n; i++) {
        printf("%s ", store[i]);
    }
    printf("\n");
    for (int i=0; i<n; i++) {
        for (int j=0; j<strlen(store[i])-1; j++) {
                if (store[i][j] == store[i][j+1]) {
                    for (int l=j; l<strlen(store[i])-1; l++) {
                        store[i][l] = store[i][l+1];
                    }
                    j--;
                    store[i] = realloc(store[i], (strlen(store[i])-1)*sizeof(char));
                    if (store[i] == NULL) exit(1);
                }
        }

    }
    for (int i=0; i<n; i++) {
        printf("%s ", store[i]);
    }
    printf("\n");
    return 0;
}

3 个答案:

答案 0 :(得分:0)

我的方法是为每个单词使用26位掩码,以便您可以按掩码值对它们进行分组。

我已经实现了,但是实际的分组和显示部分尚未优化。我确实解决了您的难题难题

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int     main() {
  int       matched = 0;
  int       n_words = 0;
  char**    words = NULL;
  int*      masks = NULL;
  int       max_mask = 0;

  while (1) { // Read until EOF breaks
      char word[30];
      if (scanf("%s\n", word) == EOF)
        break;
      words = realloc(words, sizeof(char *) * (n_words + 1));
      words[n_words] = strndup(word, 30);
      n_words++;
  }

  // Computes a bit mask for each word
  masks = malloc(sizeof(int) * n_words);
  for (int i = 0; i < n_words; ++i) {
    masks[i] = 0;    
    for (int j = 0; words[i][j] != '\0'; ++j) {
      int bit_index = words[i][j] - 'a';
      masks[i] |= (1 << bit_index);
    }
    if (masks[i] > max_mask)
      max_mask = masks[i];
  }

  // Iterate over each possible masks and print all words with this mask
  for(int i = 0; i <= max_mask; i++) {
    for (int w = 0; w < n_words; w++) {
      if (masks[w] == i) {
        printf("%s ", words[w]);
        matched = 1;
      }
    }
    if (matched) {
      printf("\n");
      matched = 0;
    }
  }
  return 0;
}

答案 1 :(得分:0)

有一个聪明的方法来确定一个单词属于哪个组。

我们必须创建一个方案,并用数字唯一标识每个组。我们如何定义和区分组:通过其包含的字母。重复字母不计算在内(这是一组)。因此,我们需要一个公式来编码一组字母。如果我们为每个字母分配一个数字(从0开始,我们将通过2的幂的和来获得id(实际上可以选择任何基数,但是2是计算的自然选择)。

例如:

"abdeae"字母集合为{'a', 'b', 'd', 'e'}。它们对应的编号为:{0, 1, 3, 4},id为2^0 + 2^1 + 2^3 + 2^4

因为有26个字母,所以我们可以使用32位整数来编码ID。 2^i对应于位i,因此算法看起来像这样:

uint32_t letter_mask(char ch)
{
    assert(ch >= 'a' && ch <= 'z');

    return (uint32_t) 1u << (ch - 'a');
}

uint32_t word_group_id(const char * str)
{
    size_t len = strlen(str);

    uint32_t id = 0;

    for (size_t i = 0; i < len; ++i)
    {
        id |= letter_mask(str[i]);
    }

    return id;
}

现在,我们有了一种简单的方法来确定一个单词的组,您需要创建一个简化版本的地图以将单词放在其中。

这是我简单的快速实现。免责声明:未经测试。您还必须通过添加对malloc和realloc的检查来改进它。

typedef struct Word_map_bucket
{
    uint32_t id;
    char** words;
    size_t words_size;
} Word_map_bucket;

void init_word_map_bucket(Word_map_bucket* bucket, uint32_t id)
{
    bucket->id = id;
    bucket->words = NULL;
    bucket->words_size = 0;
}

typedef struct Word_map
{
    Word_map_bucket* buckets;
    size_t buckets_size;
} Word_map;

void init_word_map(Word_map* map)
{
    map->buckets = NULL;
    map->buckets_size = 0;
}

Word_map_bucket* find_bucket(Word_map map, uint32_t id)
{
    for (size_t i = 0; i < map.buckets_size; ++i)
    {
        if (map.buckets[i].id == id)
            return &map.buckets[i];
    }
    return NULL;
}


Word_map_bucket* add_new_bucket(Word_map* map, uint32_t id)
{
    map->buckets = realloc(map->buckets, map->buckets_size + 1);
    map->buckets_size += 1;

    Word_map_bucket* bucket = &map->buckets[map->buckets_size + 1];
    init_word_map_bucket(bucket, id);

    return bucket;
}

void add_word(Word_map* map, const char* word)
{
    // get to bucket
    uint32_t id = word_group_id(word);
    Word_map_bucket* bucket = find_bucket(*map, id);
    if (bucket == NULL)
        bucket = add_new_bucket(map, id);

    // increase bucket->words
    bucket->words = realloc(bucket->words, bucket->words_size + 1);
    bucket->words_size += 1;

    // push word into bucket
    bucket->words[bucket->words_size - 1] = malloc(strlen(word));
    strcpy(bucket->words[bucket->words_size - 1], word);
}

答案 2 :(得分:-1)

  1. 使用int count[256]获取每个字符是否在每个单词的单词中,例如,对于"abcabc",使count['a'] = 1 count['b'] = 1 count['c'] = 1
  2. qsort具有特殊的比较用途count
  3. 对于每个单词,如果count相同,则输出,如果不输出下一行,则输出一行。

以下code可以工作:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

struct Node {
  unsigned char* st;
  int count[256];
};

int compare(const void* x, const void* y) {
  const struct Node* node_x = (const struct Node*)x;
  const struct Node* node_y = (const struct Node*)y;

  for (int i = 0; i != 256; ++i) {
    if (node_x->count[i] > node_y->count[i])
      return -1;
    if (node_x->count[i] < node_y->count[i])
      return 1;
  }
  return 0;
}

int main() {
  unsigned char f[][10] = {"abc", "acb", "abcabc", "cab", "de", "gh", "ab", "ed", "hg"};
  int n = sizeof(f) / sizeof(f[0]);
  struct Node node[100];

  for (int i = 0; i != n; ++i) {
    node[i].st = f[i];
    for (int j = 0; j != sizeof(node[i].count) / sizeof(node[i].count[0]); ++j)
      node[i].count[j] = 0;
    for (int j = 0; f[i][j] != '\0'; ++j) {
      unsigned char ch = f[i][j];
      node[i].count[ch] = 1;
    }
  }

  qsort(node, n, sizeof(struct Node), compare);

  int t = 0;
  for (int i = 0; i < n; ++i) {
    if (i == 0) {
      ++t;
      continue;
    }
    int k = i - 1;
    if (memcmp(node[i].count, node[k].count, sizeof(node[i].count)) == 0) {
      if (t == 1)
        printf("%s ", node[k].st);
      printf("%s ", node[i].st);
      ++t;
    } else {
      if (t != 0)
        printf("\n");
      t = 0;
    }
  }
  printf("\n");

  return 0;
}