霍夫曼编码大文件的问题?

时间:2014-04-15 11:51:22

标签: c++ text-files binaryfiles huffman-code

我的程序适用于包含最多3-4行的文本文件..但是当有更多行或字符时,编码有问题。请给我一些解决方案。 这是我的计划:

#include <stdio.h>
#include <string.h>
#include<fstream>
#include<string>
#include<iostream>

using namespace std;
typedef struct node_t {
    struct node_t *left, *right;
    int freq;
    char c;
} *node;

struct node_t pool[256] = {{0}};
node qqq[255], *q = qqq - 1;
int n_nodes = 0, qend = 1;
char *code[128] = {0}, buf[1024];

node new_node(int freq, char c, node a, node b)
{
    node n = pool + n_nodes++;
    if (freq) n->c = c, n->freq = freq;
    else {
        n->left = a, n->right = b;
        n->freq = a->freq + b->freq;
    }
    return n;
}

/* priority queue */
void qinsert(node n)
{
    int j, i = qend++;
    while ((j = i / 2)) {
        if (q[j]->freq <= n->freq) break;
        q[i] = q[j], i = j;
    }
    q[i] = n;
}

node qremove()
{
    int i, l;
    node n = q[i = 1];

    if (qend < 2) return 0;
    qend--;
    while ((l = i * 2) < qend) {
        if (l + 1 < qend && q[l + 1]->freq < q[l]->freq) l++;
        q[i] = q[l], i = l;
    }
    q[i] = q[qend];
    return n;
}

/* walk the tree and put 0s and 1s */
void build_code(node n, char *s, int len)
{
    static char *out = buf;
    if (n->c) {
        s[len] = 0;
        strcpy(out, s);
        code[n->c] = out;
        out += len + 1;
        return;
    }

    s[len] = '0'; build_code(n->left,  s, len + 1);
    s[len] = '1'; build_code(n->right, s, len + 1);
}

void init(const char *s)
{
    int i, freq[128] = {0};
    char c[16];

    while (*s) freq[(int)*s++]++;

    for (i = 0; i < 128; i++)
        if (freq[i]) qinsert(new_node(freq[i], i, 0, 0));

    while (qend > 2)
        qinsert(new_node(0, 0, qremove(), qremove()));

    build_code(q[1], c, 0);
}

void encode(const char *s, char *out)
{
    while (*s) {
        strcpy(out, code[*s]);
        out += strlen(code[*s++]);
    }
}

void decode(const char *s, node t)
{
    node n = t;
    while (*s) {
        if (*s++ == '0') n = n->left;
        else n = n->right;

        if (n->c) putchar(n->c), n = t;
    }

    putchar('\n');putchar('\n');
    if (t != n) printf("garbage input\n");
}

int main(void)
{
    int i;

    string filename;
    ifstream myfile;
    cout<<"Please Enter File Name that you want to Encode: ";
    getline(cin,filename);
    filename += ".txt";

    myfile.open(filename.c_str());

    std::string str;
    str.assign( (std::istreambuf_iterator<char>(myfile) ),
               (std::istreambuf_iterator<char>()    ) );
    std::cout<<"\n"<<str<<endl<<endl;

    const char * c = str.c_str();
    char buf[1024];

    init(c);
    for (i = 0; i < 128; i++)
        if (code[i]) printf("'%c': %s\n", i, code[i]);

    encode(c, buf);
    ofstream binfile("binary.txt");
    if(binfile.is_open())
    {
        binfile<<buf;
        binfile.close();
    }

    printf("\nEncoded:\n%s\n", buf);

    printf("\n\nDecoded:\n");
    ifstream dfile("binary.txt");

    while(getline(dfile,str))
        cout<<str<<endl<<endl;
    const char * y = str.c_str();
    decode(y, q[1]);

    return 0;
}

此外,有人可以建议我如何在二进制文件中编写输出,因为我正在压缩它。在此先感谢:)

0 个答案:

没有答案