我实现了一个解决这个问题的递归函数:
我的函数的想法是对每个可能方向的每个字母进行递归调用,然后连接它将要形成的字符串。当此字符串的长度为 n 时,它将停止。为了防止返回到已经使用的字母,我创建了一个指向字母类的指针数组,它将每个可能的新字母与所有使用的字母进行比较。 为此,我为每个字母创建了一个特殊的类。
这是功能:(抱歉,这很长)
dirSu表示Up - dirGiu Down - dirDx right - dirSx left - 以及所有其他对角线位置
void decisione(lettera *start, lettera *next, int count, string seq, vector <lettera*> visitate, int quanti) {
if (next == nullptr) return ;
if (count == quanti) {
bool test = false;
for (int k = 0; k < start->sequenze.size(); k++) { //To prevent same letter to be used
if (start->sequenze[k] == seq) {
test = true;
break;
}
}
if (!test) start->sequenze.push_back(seq);
return ;
}
seq = seq + next->chiave; //String concatenating every call
count++; //Counter to decide when stop
visitate.push_back(next); //Array of used letters
int i;
bool test;
//Decide direction
test = false;
for(i = 0; i < visitate.size(); i++) {
if (visitate[i] == next->dirSu) {
test = true;
break;
}
}
if (!test) decisione(start, next->dirSu, count, seq, visitate, quanti);
test = false;
for(i = 0; i < visitate.size(); i++) {
if (visitate[i] == next->dirSu) {
test = true;
break;
}
}
if (!test) decisione(start, next->dirSu, count, seq, visitate, quanti);
test = false;
for(i = 0; i < visitate.size(); i++) {
if (visitate[i] == next->dirGiu) {
test = true;
break;
}
}
if (!test) decisione(start, next->dirGiu, count, seq, visitate, quanti);
test = false;
for(i = 0; i < visitate.size(); i++) {
if (visitate[i] == next->dirSx) {
test = true;
break;
}
}
if (!test) decisione(start, next->dirSx, count, seq, visitate, quanti);
test = false;
for(i = 0; i < visitate.size(); i++) {
if (visitate[i] == next->dirDx) {
test = true;
break;
}
}
if (!test) decisione(start, next->dirDx, count, seq, visitate, quanti);
test = false;
for(i = 0; i < visitate.size(); i++) {
if (visitate[i] == next->dirAdx) {
test = true;
break;
}
}
if (!test) decisione(start, next->dirAdx, count, seq, visitate, quanti);
test = false;
for(i = 0; i < visitate.size(); i++) {
if (visitate[i] == next->dirAsx) {
test = true;
break;
}
}
if (!test) decisione(start, next->dirAsx, count, seq, visitate, quanti);
test = false;
for(i = 0; i < visitate.size(); i++) {
if (visitate[i] == next->dirBdx) {
test = true;
break;
}
}
if (!test) decisione(start, next->dirBdx, count, seq, visitate, quanti);
test = false;
for(i = 0; i < visitate.size(); i++) {
if (visitate[i] == next->dirBsx) {
test = true;
break;
}
}
if (!test) decisione(start, next->dirBsx, count, seq, visitate, quanti);
}
好吧,当我激活3个字母的单词功能时,它真的很快,4个字母:矩阵中每个字母的计算时间0.028秒,5个字母:0.225s,6个字母:1.891s,7个字母: 14.914s。
有没有办法通过优化我的功能来减少计算时间? (或删除我的愚蠢逻辑错误?) 非常感谢!
我用于测试的矩阵是:
C A S A
C O S A
B A C I
O T A T
我在每次速度测试中使用它(不按频率计算或随机计算)和这个(我的问题后我的递归函数有所改进)我在大约90秒内得到所有单词,每个单词有5,6,7个字母起始信。有很多单词,虽然它只是一个4x4矩阵! 如果我可以私下发送给您,那么也可以看到您对同一问题的解决方案。 (我不知道如何在这里与您联系)
答案 0 :(得分:2)
将算法从递归更改为迭代(使用循环)。它通常更快。之后,其他优化与算法和数据结构相关,以降低复杂性。找到这样的优化要困难得多(但可能)。
编辑:关于代码的一些想法,然后是我今天所做的字搜索版本:
所以,这是代码。它是独立的,您只需编译它,使用英语(或意大利语或法语)单词文件(文本,每行一个单词)执行:
#include <string>
#include <iostream>
#include <stdlib.h>
#include <vector>
#include <time.h>
#include <map>
#include <algorithm>
// **************************************************** //
// //
// Data structure for the words : dictionnary //
// //
// **************************************************** //
struct DicNode
{
const char l;
bool isEndOfWord;
DicNode* children[26]; // a - z, case insensitive
DicNode* parent;
unsigned int min, max, level;
DicNode(char c, DicNode* p = 0, bool end = false) : l(c),isEndOfWord(end),parent(p),min((unsigned int)(-1)),max(0), level(parent ? parent->level + 1 : 0)
{
for(unsigned int t = 0; t < 26; t++)
children[t] = 0;
}
~DicNode()
{
for(unsigned int t = 0; t < 26; t++)
if(children[t])
delete children[t];
}
DicNode*& at(char l)
{
if(l == 'à' || l == 'ä' || l == 'â')
l = 'a';
if(l == 'é' || l == 'è' || l == 'ê' || l == 'ë')
l = 'e';
if(l == 'ï' || l == 'î')
l = 'i';
if(l == 'ö' || l == 'ô')
l = 'o';
if(l == 'ü' || l == 'û' || l == 'ù')
l = 'u';
if(l == 'ç')
l = 'c';
if(l <= 'Z')
l = 'a' + l - 'A';
// Note: undefined behavior if l > 'Z' or l < 'A'.
return children[(unsigned int)(l - 'a')];
}
bool inRange(unsigned int n) const
{
return n <= max && n >= min;
}
void print(std::string str = "") const
{
// Here recursive algorithm is way better because of the object-oriented structure
if(isEndOfWord)
std::cout << (str + l) << "\n";
for(unsigned int t = 0; t < 26; t++)
if(children[t])
children[t]->print(str + l);
}
std::string toString() const
{
std::string str(level, '0');
const DicNode* node = this;
while(node && node->level > 0)
{
str[node->level - 1] = node->l;
node = node->parent;
}
return str;
}
};
void addWord(DicNode* root, const std::string& str)
{
if(!root || str == "") return;
DicNode* node = root;
unsigned int i = 0;
while(i != str.size())
{
if(str.size() - i > node->max) node->max = str.size() - i;
if(str.size() - i < node->min) node->min = str.size() - i;
char c = tolower(str[i]);
DicNode*& n = node->at(c);
if(!n) n = new DicNode(c,node);
node = n;
i++;
}
node->isEndOfWord = true;
}
// **************************************************** //
// //
// Data structures for the algorithm //
// //
// **************************************************** //
enum Direction
{
INVALID,
NORTH,
NORTH_EAST,
EAST,
SOUTH_EAST,
SOUTH,
SOUTH_WEST,
WEST,
NORTH_WEST,
FINISHED
};
Direction incDir(Direction d)
{
switch(d)
{
case NORTH: return NORTH_EAST;
case NORTH_EAST: return EAST;
case EAST: return SOUTH_EAST;
case SOUTH_EAST: return SOUTH;
case SOUTH: return SOUTH_WEST;
case SOUTH_WEST: return WEST;
case WEST: return NORTH_WEST;
case NORTH_WEST: return NORTH;
default: return INVALID;
}
}
Direction operator-(Direction d)
{
switch(d)
{
case NORTH: return SOUTH;
case NORTH_EAST: return SOUTH_WEST;
case EAST: return WEST;
case SOUTH_EAST: return NORTH_WEST;
case SOUTH: return NORTH;
case SOUTH_WEST: return NORTH_EAST;
case WEST: return EAST;
case NORTH_WEST: return SOUTH_EAST;
default: return INVALID;
}
}
std::string toString(Direction d)
{
switch(d)
{
case NORTH: return "NORTH";
case NORTH_EAST: return "NORTH_EAST";
case EAST: return "EAST";
case SOUTH_EAST: return "SOUTH_EAST";
case SOUTH: return "SOUTH";
case SOUTH_WEST: return "SOUTH_WEST";
case WEST: return "WEST";
case NORTH_WEST: return "NORTH_WEST";
default: return "INVALID";
}
}
struct Cell
{
char l;
DicNode* node;
Direction dir, dirParent;
Cell(char l_ = 'A') : l(l_),node(0),dir(INVALID),dirParent(INVALID) {}
};
struct ProbLetter
{
char letter;
float proba;
};
class Map
{
public:
Map(unsigned int w, unsigned int h) : width(w), height(h)
{
data = new Cell[width * height];
for(unsigned int t = 0; t < width * height; t++)
data[t] = randomEnglishLetter();
}
static char randomEnglishLetter()
{
// Frequency of english letters
static ProbLetter probas[26] =
{
{ 'Z', 0.074f },
{ 'Q', 0.095f },
{ 'X', 0.150f },
{ 'J', 0.153f },
{ 'K', 0.772f },
{ 'V', 0.978f },
{ 'B', 1.492f },
{ 'P', 1.929f },
{ 'Y', 1.974f },
{ 'G', 2.015f },
{ 'F', 2.228f },
{ 'W', 2.360f },
{ 'M', 2.406f },
{ 'U', 2.758f },
{ 'C', 2.782f },
{ 'L', 4.025f },
{ 'D', 4.253f },
{ 'R', 5.987f },
{ 'H', 6.094f },
{ 'S', 6.327f },
{ 'N', 6.749f },
{ 'I', 6.966f },
{ 'O', 7.507f },
{ 'A', 8.167f },
{ 'T', 9.056f },
{ 'E', 12.702f }
};
// Random number between 0 and 1
float p = 100.0f * float(rand() % 10000) / 9999.0f;
float sum = 0.0f;
for(unsigned int t = 0; t < 26; t++)
{
sum += probas[t].proba;
if(p < sum) return probas[t].letter;
}
return probas[25].letter;
}
Cell& operator()(unsigned int x, unsigned int y)
{
return data[x + y * width];
}
bool inRange(int x, int y)
{
return x >= 0 && x < (int)width && y >= 0 && y < (int)height;
}
void print()
{
for(unsigned int y = 0; y < height; y++)
{
for(unsigned int x = 0; x < width; x++)
std::cout << " " << data[x + y * width].l;
std::cout << "\n";
}
}
unsigned int getWidth() const { return width; }
unsigned int getHeight() const { return height; }
private:
unsigned int width, height;
Cell* data;
};
// **************************************************** //
// //
// Word-search algorithm //
// //
// **************************************************** //
inline void advance(int& x, int& y, Direction d)
{
switch(d)
{
case NORTH:
y--;
return;
case NORTH_EAST:
x++;
y--;
return;
case EAST:
x++;
return;
case SOUTH_EAST:
x++;
y++;
return;
case SOUTH:
y++;
return;
case SOUTH_WEST:
x--;
y++;
return;
case WEST:
x--;
return;
case NORTH_WEST:
x--;
y--;
return;
default:
return;
}
}
struct Data
{
Map& map;
int x;
int y;
};
bool descend(Data& dat, unsigned int n)
{
DicNode* parent = dat.map(dat.x, dat.y).node;
if(n == parent->level) return false;
Direction dir = dat.map(dat.x, dat.y).dir;
if(dir == FINISHED) return false;
else if(dir == INVALID) dir = NORTH;
int pX = dat.x, pY = dat.y;
bool firstIteration = true;
for(; firstIteration || dir != NORTH; dir = incDir(dir))
{
firstIteration = false;
pX = dat.x;
pY = dat.y;
advance(pX, pY, dir);
if(dat.map.inRange(pX, pY) // (pX, pY) is not outside the map
&& dat.map(pX, pY).node == 0 // The cell is not already used
&& parent->at(dat.map(pX, pY).l) != 0) // An entry in the dictionary exists
{
// We found the next node
dat.map(dat.x, dat.y).dir = dir;
dat.map(pX, pY).node = parent->at(dat.map(pX, pY).l);
dat.map(pX, pY).dirParent = -dir;
dat.x = pX;
dat.y = pY;
return true;
}
}
return false;
}
bool ascend(Data& dat)
{
// Go back on the previous node
Direction dp = dat.map(dat.x, dat.y).dirParent;
if(dp == INVALID) return false;
dat.map(dat.x, dat.y).dir = INVALID;
dat.map(dat.x, dat.y).dirParent = INVALID;
dat.map(dat.x, dat.y).node = 0;
advance(dat.x, dat.y, dp);
dat.map(dat.x, dat.y).dir = incDir(dat.map(dat.x, dat.y).dir);
if(dat.map(dat.x, dat.y).dir == NORTH)
dat.map(dat.x, dat.y).dir = FINISHED;
return true;
}
void findWordsFromPosition(Map& map, DicNode* parent, unsigned int x, unsigned int y, unsigned int n, std::vector<std::string>& output)
{
if(!parent) return;
bool ok = true;
// Setup first node
map(x, y).node = parent;
map(x, y).dir = NORTH;
// while we can find next node with direction
// If marked node has right order and is end of word, or if condition on n is not verified:
// add it to the output (if condition on n is verified)
// no need to go further (because of n), so advance direction of parent cell, reset current cell, and go up for the current position
Data dat = { map, x, y };
while(ok)
{
if(map(dat.x,dat.y).node->toString() == "ane")
std::cout << "ok\n";
if(!descend(dat, n))
ok = ascend(dat);
else
{
DicNode* node = dat.map(dat.x, dat.y).node;
if(node->level == n && node->isEndOfWord)
{
std::string str = node->toString();
if(std::find(output.begin(), output.end(), str) == output.end())
output.push_back(str);
ok = ascend(dat);
}
else if(!node->inRange(n - node->level))
ok = ascend(dat);
}
}
// Clean first node
map(x, y).dir = INVALID;
map(x, y).dirParent = INVALID;
map(x, y).node = 0;
}
void getWords(Map& map, DicNode& dic, unsigned int n, std::vector<std::string>& output)
{
if(n > dic.max || n < dic.min) return;
// Search words for every position
for(unsigned int y = 0; y < map.getHeight(); y++)
for(unsigned int x = 0; x < map.getWidth(); x++)
findWordsFromPosition(map,dic.at(map(x,y).l),x,y,n,output);
}
#include <fstream>
int main()
{
srand((unsigned int)time(0));
// Prepare the words, for example, load them from a real dictionary
DicNode dictionary(0);
unsigned int maxSize = 0;
// Note: the following code make sense only if you load words from a file
{
std::ifstream dico("english.txt");
std::string str;
int count = 0;
while(dico >> str)
{
if(str.size() > maxSize) maxSize = str.size();
addWord(&dictionary,str);
count++;
}
std::cout << "Read " << count << " words from dictionary, longest with " << maxSize << " letters.\n";
}
// Prepare the matrix
Map mat(4,4);
/*
mat(0,0) = 'A';
mat(1,0) = 'N';
mat(2,0) = 'F';
mat(3,0) = 'N';
mat(0,1) = 'S';
mat(1,1) = 'L';
mat(2,1) = 'E';
mat(3,1) = 'R';
mat(0,2) = 'G';
mat(1,2) = 'O';
mat(2,2) = 'R';
mat(3,2) = 'R';
mat(0,3) = 'S';
mat(1,3) = 'I';
mat(2,3) = 'U';
mat(3,3) = 'I';
*/
std::cout << "\nMatrix:\n";
mat.print();
// Search words
std::vector<std::string> words;
for(unsigned int t = 5; t <= 7; t++)
getWords(mat,dictionary,t,words);
std::cout << "\nWords:\n";
if(words.size() == 0)
std::cout << " (no words)\n";
else
for(unsigned int t = 0; t < words.size(); t++)
std::cout << " " << words[t] << "\n";
}
注意:前面的代码是代码搜索解析器。它现在已从答案中删除,但您仍然可以通过搜索此答案的编辑修订版来获取它。
<强> EDIT2 强>: 一些口头解释
字典。 这是一种radix-tree,但每个节点只有一个字母要存储。它最多可以有26个孩子(如果你包括口音和/或数字则更多),每个字母对应一个字母。基本布局如下:
Word tree http://img571.imageshack.us/img571/2281/wtree.png
示例:
An example of a dictionary http://img706.imageshack.us/img706/1244/wordsr.png
每个节点还包含一个布尔值,表示它是否是单词的结尾。 另外,每个节点存储子子节点的最小和最大大小,其父节点以及它们从根节点的级别(=前缀的大小)。当我们看到没有子分支可以给出具有特定长度的单词时,它允许停止搜索单词。
矩阵。 矩阵是一个双字母数组。但是,为了提高效率,我添加了一些数据:字典中的相应节点(参见算法),子节点的方向(参见算法)及其父节点(参见算法)。
算法。 想法是通过'降序'(增加代表当前找到的字符串的路径)在地图中'圈'。当无法下降时,我们必须“提升”:
FIND WORD:
----------
while(we can continue)
try descend
if(failed to descend)
ascend
else
node = node of current cell
if node is end of word
add it to the found words, then ascend
else if node is out of range (ie, n = 5, but with node you can only have words with 3 characters)
ascend
DESCEND:
--------
we're from cell (x,y), with node D
for each direction (from cell.direction to north-west, clockwise)
advance position (x,y) with direction
if the position is valid
and we haven't already visited the cell for this word
and the node got with D.children[cell.letter] exists
set the current position of the algorithm to the result of the advance
set the direction of the old cell to the direction used to advance
set the direction of the new cell to the opposite (pointing to the old cell)
set the node of the new cell by the found one
return
ASCEND:
-------
we're at (x,y)
cell.node = 0 (the cell is not considered as visited anymore)
advance the parent direction by one clockwise (if this direction is north-west, we use a special direction that say that we cannot rotate anymore: FINISHED)
cell.dir = INVALID
advance (x,y) following cell.direction_to_parent
cell.direction_to_parent = INVALID
set the current position to the result of advance
图像说明:
Algorithm unrolled http://img822.imageshack.us/img822/1374/algow.png
步骤(在losanges中显示的数字):
答案 1 :(得分:2)
你正在处理大量的组合,因为你不是在寻找真正的单词。如果您是,您将能够查找字符组合,并查看某个单词是否仍然可以使用特定语言。抬头需要更多时间,但许多组合将被淘汰 但最后,您愿意花多少时间让应用程序更快一点?
话虽如此:优化可能有可能因为你按值传递一个向量和一个字符串,它们都会产生内存分配(当然对于vector,可能是字符串)。传递std::array<lettera*,wordlength>
代替应该更快,并且也会给你这个词。
答案 2 :(得分:2)
首先,您正在寻找的解决方案的数量随着长度呈指数增长,因此任何解决方案都将是指数级的;所有你希望做的就是减少常数因素。
但是,我建议保持已经找到的字符串排序,并对每个新字符串进行二进制搜索。对于7的长度,一旦重复排除,您将发现超过60000。 (在快速实现中,我自己敲了一下,这产生了10倍以上的差异,差异越大,我所寻找的字符串就越长。)同样,用一个表示是否被访问的标记来注释每个单元格。不是,而不是不得不以某种方式查找它,也应该节省很多时间。