我在Trie中插入了10k字符串,现在我需要找到1并与其他字符串进行比较,然后再对每个字符串进行比较。因此,此搜索应该很快,我不认为我的功能是最好的解决方案。
#include <stdio.h>
#include <locale.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include "trie.h"
#include "lcs.h"
#define WORD_SIZE 64
#define CHAR_SIZE 256
#define FILE_NAME "file.txt"
char words[WORD_SIZE] = {0};
char defWord[WORD_SIZE] = {0};
int firstWord = 0;
int j = 0;
void searchStrings(struct Trie *head) {
struct Trie *curr = head;
int i, k;
for (i = 0; i < CHAR_SIZE - 1; i++) {
if (curr->character[i] != NULL && !curr[i].isProcessed) {
if (curr[i].isLeaf) {
if (!firstWord) {
for (k = 0; k < WORD_SIZE; k++) {
defWord[k] = words[k];
if (words[k] == '\0') break;
}
firstWord = 1;
continue;
}
curr[i].isProcessed = 1;
//compareStrings(words); TODO: complete that function
} else {
words[j] = i;
searchStrings(curr);
}
}
}
}
void parseData(FILE *text) {
int c = 0, wordIter = 0;
char word[WORD_SIZE] = {0};
struct Trie *head = getNewTrieNode();
if (!head) {
printf("Error! Structure is not created.");
return;
}
setlocale(LC_ALL, "");
while (c != EOF) {
c = getc(text);
if (!isalpha(c)) {
insert(&head, word);
memset(word, 0, strlen(word));
wordIter = 0;
continue;
}
word[wordIter++] = c;
}
searchStrings(head);
free(head);
}
int loadFile() {
FILE *text;
text = fopen(FILE_NAME, "r");
if (!text) {
printf("Error! Cannot open file.");
return EXIT_FAILURE;
}
parseData(text);
fclose(text);
return EXIT_SUCCESS;
}
这是特里的文件。
#include <stdio.h>
#include <stdlib.h>
#include "trie.h"
#define CHAR_SIZE 256
struct Trie {
int isLeaf; // 1 when node is a leaf node
int isProcessed; // 1 when leaf node is processed
struct Trie *character[CHAR_SIZE];
};
// Function that returns a new Trie node
struct Trie *getNewTrieNode() {
int i;
struct Trie *node = (struct Trie *) malloc(sizeof(struct Trie));
node->isLeaf = 0;
node->isProcessed = 0;
for (i = 0; i < CHAR_SIZE; i++)
node->character[i] = NULL;
return node;
}
// Iterative function to insert a string in Trie.
void insert(struct Trie **head, char *str) {
// start from root node
struct Trie *curr = *head;
int numb = 0;
while (*str) {
numb = *str - 'A';
if (numb < 0) { // for negative numbers
numb += CHAR_SIZE;
}
// create a new node if path doesn't exists
if (curr->character[numb] == NULL)
curr->character[numb] = getNewTrieNode();
// go to next node
curr = curr->character[numb];
// move to next character
str++;
}
// mark current node as leaf
curr->isLeaf = 1;
}
主文件
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "parser.h"
int main(void) {
loadFile();
return 0;
}
我正在寻找参数isLeaf。如果为参数1,则表示字符串结尾。因此,当我找到第一个未处理的字符时,可以将其添加到全局数组中。当我找到第一个Leaf char时,我可以保存它并将其发送给下一个函数。但是元音有问题。我可以在长度> 3时添加if语句。我可以简化它吗?还是有更好的算法?有256个字符大小,因为我需要捷克字母的所有字符(包括大写字母),并且在CP1250 ascii中有些字符为128+。