C ++ word unscrambler

时间:2015-04-28 23:33:06

标签: c++ permutation word

我对C ++很新,作为练习,我正在尝试写一个“单词解码器”。也就是说,我有一个大文本文件,里面装满了一些文字。每个trie_node都有一个27个trie_nodes的数组,默认情况下为NULL,除非该元素与字母表中可以跟随trie_node表示的字母的字母共享相同的位置。 27元素表示该单词可以在该节点结束。

我有这个课程,我希望通过所有字母组合进行排列,但不打扰通过不可能的字母组合。

我写的几乎有我需要的东西。但是,它仅适用于非常特定的字母组合。

例如,如果您输入字母“last”,则会得到以下字词:

last
salt
slat

但是,如果你输入“salt”这个词(“last”的排列),你只能得到这个:

salt

我很确定问题出在我的permute()方法中。找到这些单词的最有效方法是什么,而不是遍历所有排列并将其与单词列表进行比较(这将是一个昂贵的n!操作)?

#pragma once

#include <map>
#include <string>
#include <fstream>

#include "trie.h"

using std::ifstream;
using std::string;
using std::map;

class Words
{
private:
    trie_node* WordEnd; // end marker
    trie_node wordbank;
    map<string, short> _found;

    template <typename E>
    static void swap(E* i, E* j) {
        E k = *i;
        *i = *j;
        *j = k;
    }

    void permute(char* word, const trie_node* node, size_t pos, size_t len) {
        if (is_word(word, len)) {
            string str_word(word, len);
            _found[str_word] = 0;
        }
        if (pos < len - 1) {
            size_t pos2;
            for (pos2 = pos; pos2 < len; ++pos2) {
                char* j = word + pos2;
                const trie_node* _next = next(node, *j);
                if (_next) { // check if that's a valid path
                    char* i = word + pos;
                    swap(i, j); // swap letters
                    permute(word, _next, pos, len); // find that route
                    swap(i, j); // switch back
                }
            }
        }
    }

public:
    Words()
        : wordbank(27) {
        WordEnd = new trie_node(1);
    }

    Words(const Words& other)
        : wordbank(27) {
        operator=(other);
    }

    ~Words() {
        delete WordEnd;
    }

    Words& operator=(const Words& other) {
        if (this != &other) {
            WordEnd = new trie_node(*WordEnd);
            wordbank = other.wordbank;
            _found = other._found;
        }
        return *this;
    }

    void clear() {
        _found.clear();
    }

    void permute(char* word, size_t len) {
        permute(word, &wordbank, 0, len);
    }

    size_t size() const {
        return _found.size();
    }

    size_t found(string buff[], size_t len) const {
        if (len > _found.size()) {
            len = _found.size();
        }
        size_t index = 0;
        for (map<string, short>::const_iterator it = _found.begin(), e = _found.end(); it != e; ++it) {
            buff[index] = it->first;
            if (++index == len) {
                break;
            }
        }
        return len;
    }

    const trie_node* next(char c) const {
        return next(&wordbank, c);
    }

    static const trie_node* next(const trie_node* n, char c) {
        if (isalpha(c)) {
            size_t pos = tolower(c) - 'a';
            return n->operator[](pos);
        }
        return NULL;
    }

    bool is_word(const char* word, size_t len) const {
        const trie_node* node = &wordbank;
        for (size_t i = 0; i < len; ++i) {
            if (isalpha(word[i])) {
                size_t index = tolower(word[i]) - 'a';
                const trie_node* next = node->operator[](index);
                if (!next) {
                    return false;
                }
                node = next;
            }
        }
        return node->operator[](26) == WordEnd;
    }

    bool load(const string& path) {
        ifstream wordfile;
        wordfile.open(path);
        if (!wordfile.is_open()) {
            return false;
        }
        trie_node* node = &wordbank;
        string word;
        while (getline(wordfile, word)) {
            size_t i = 0;
            for (; i < word.size(); ++i) {
                size_t index = word[i] - 'a';
                trie_node* _next = (*node)[index];
                if (!_next) {
                    _next = node->branch(index);
                }
                node = _next;
                if (i == word.size() - 1) {
                    _next->set(26, WordEnd);
                }
            }
        }
        wordfile.close();
        return true;
     }
};

2 个答案:

答案 0 :(得分:2)

IIUC你试图在字典中找到一个单词的所有字谜。最好的方法如下:

1. Create map from string to list of strings.
2. For each word in dictionary.
  a. Let sortedWord = sort letters in word lexicographically.
  b. Add word to the list in the map whose key is sortedWord
3. Let searchWord be the word whose anagrams you are looking for.
4. Let sortedSearchWord = sort letters in searchWord lexicographically.
5. Return map[sortedSearchWord]

假设字典中最长的单词有k个字母且有n个单词,则此算法在O(n*k*log(k))中运行以构建地图,然后在O(k*log(k))中运行以查找给定单词的字谜。

答案 1 :(得分:0)

Thanks for your suggestions. I streamlined the whole thing with this:

#include <iostream>
#include <string>
#include <algorithm>
#include <fstream>

using namespace std;

inline void sort(string& str) {
    sort(str.begin(), str.end());
}

void findwords(string& letters, istream& in, ostream& out) {
    sort(letters);
    string word;
    while (getline(in, word)) {
        string token(word);
        sort(token);
        if (token == letters) {
            out << word << endl;
        }
    }
}

int main(int argc, char* argv[]) {
    if (argc != 2) {
        cout << "usage: scramble <word>" << endl;
        return 1;
    }
    ifstream wordsfile;
    wordsfile.open("words.txt");
    if (!wordsfile.is_open()) {
        cout << "unable to load words.txt" << endl;
        return 2;
    }
    string words(argv[1]);
    findwords(words, wordsfile, cout);
    wordsfile.close();
    return 0;
}

This pretty much solves everything. However, I may want to add functionality to find all possible words in a string, not just anagrams, but that's another project.