我正在使用Visual Studio 2013,在Windows 7 64位上运行。 我试图显示不同于英语的语言。我的代码是:
#include <iostream>
#include <string>
using namespace std;
typedef char byte;
byte text[] = "текст";
int text_len;
struct Huf {
byte id;
int wh;
Huf *left, *right;
};
struct List {
List *next;
Huf *tree;
};
List *head;
char code[256];
void createList();
void writeList();
void delList(List *);
void addList(Huf *);
Huf *findDels();
void createTree();
void rlrootTree(Huf *, unsigned);
int main()
{
text_len = strlen(text);
createList();
writeList();
createTree();
cout << "writeCodes\n";
rlrootTree(head->tree, 0);
cout << endl;
system("pause");
return 0;
}
void createList()
{
int i;
int ch[256] = { 0 };
for (i = 0; i<text_len; i++) ch[text[i]]++;
List *l;
Huf *h;
head = 0;
for (i = 0; i<255; i++) if (ch[i]>0)
{
h = new Huf;
h->id = i; h->wh = ch[i];
h->left = 0; h->right = 0;
l = new List;
l->tree = h;
l->next = head; head = l;
}
}
void writeList()
{
cout << "writeList\n";
List *l = head;
while (l)
{
cout << (l->tree)->id << " ";
l = l->next;
}
cout << endl;
l = head;
while (l)
{
cout << (l->tree)->wh << " ";
l = l->next;
}
cout << endl;
}
void delList(List *l)
{
List *lp, *lc;
if (l == head) { head = l->next; delete l; }
else
{
lp = head; lc = lp->next;
while (lc != l) { lp = lc; lc = lc->next; }
lp->next = lc->next; delete lc;
}
}
void addList(Huf *h)
{
List *l = new List;
l->tree = h;
l->next = head;
head = l;
}
Huf *findDels()
{
List *l = head, *sm = head;
Huf *h;
while (l)
{
if ((l->tree)->wh < (sm->tree)->wh) sm = l;
l = l->next;
}
h = sm->tree;
delList(sm);
return h;
}
void createTree()
{
Huf *h, *h1, *h2;
while (head->next)
{
h1 = findDels();
h2 = findDels();
h = new Huf;
h->id = ' '; h->wh = h1->wh + h2->wh;
h->left = h1; h->right = h2;
addList(h);
}
}
void rlrootTree(Huf *h, unsigned index)
{
if (h)
{
code[index] = '0';
rlrootTree(h->right, index + 1);
if (h->left == 0)
{
code[index] = '\0';
cout << h->id << "->" << code << " ";
}
code[index] = '1';
rlrootTree(h->left, index + 1);
}
}
我尝试添加<locale>
和<windows.h>
包含AnsiToOem()
,但似乎无法编译 - 我收到字符错误。
答案 0 :(得分:0)
您正在逐字节地迭代文本。但每个字母( grapheme )由两个代码单元组成,即两个字节。
为了简单起见,我将字符串存储为char32_t数组,因为整个代码点(一个字母)将适合数组的一个元素。
如果您希望能够处理绝对每个国际“角色”,您还必须处理字形集群,那么您可以查看ICU库或类似文件。
一个危险简化的建议:
#include <iostream>
#include <string>
#include <cstring>
using namespace std;
typedef char32_t byte;
byte text[] = U"текст";
int text_len;
struct Huf {
byte id;
int wh;
Huf *left, *right;
};
struct List {
List *next;
Huf *tree;
};
List *head;
char code[16000];
void createList();
void writeList();
void delList(List *);
void addList(Huf *);
Huf *findDels();
void createTree();
void rlrootTree(Huf *, unsigned);
unsigned strlen(char32_t* st) {
unsigned len = 0;
while (*(st+len) != 0) ++len;
return len;
}
int main()
{
text_len = strlen(text);
std::cout << "len = " << text_len << std::endl;
createList();
writeList();
createTree();
cout << "writeCodes\n";
rlrootTree(head->tree, 0);
cout << endl;
system("pause");
return 0;
}
void createList()
{
int i;
int ch[16000] = { 0 }; //you really need more here
for (i = 0; i<text_len; i++) ch[text[i]]++;
List *l;
Huf *h;
head = 0;
for (i = 0; i<16000; i++) if (ch[i]>0)
{
h = new Huf;
h->id = i; h->wh = ch[i];
h->left = 0; h->right = 0;
l = new List;
l->tree = h;
l->next = head; head = l;
}
}
void writeList()
{
cout << "writeList\n";
List *l = head;
while (l)
{
cout << (l->tree)->id << " ";
l = l->next;
}
cout << endl;
l = head;
while (l)
{
cout << (l->tree)->wh << " ";
l = l->next;
}
cout << endl;
}
void delList(List *l)
{
List *lp, *lc;
if (l == head) { head = l->next; delete l; }
else
{
lp = head; lc = lp->next;
while (lc != l) { lp = lc; lc = lc->next; }
lp->next = lc->next; delete lc;
}
}
void addList(Huf *h)
{
List *l = new List;
l->tree = h;
l->next = head;
head = l;
}
Huf *findDels()
{
List *l = head, *sm = head;
Huf *h;
while (l)
{
if ((l->tree)->wh < (sm->tree)->wh) sm = l;
l = l->next;
}
h = sm->tree;
delList(sm);
return h;
}
void createTree()
{
Huf *h, *h1, *h2;
while (head->next)
{
h1 = findDels();
h2 = findDels();
h = new Huf;
h->id = ' '; h->wh = h1->wh + h2->wh;
h->left = h1; h->right = h2;
addList(h);
}
}
void rlrootTree(Huf *h, unsigned index)
{
if (h)
{
code[index] = '0';
rlrootTree(h->right, index + 1);
if (h->left == 0)
{
code[index] = '\0';
cout << h->id << "->" << code << " ";
}
code[index] = '1';
rlrootTree(h->left, index + 1);
}
}