霍夫曼编码优先级队列

时间:2017-01-15 21:35:38

标签: c++ arrays algorithm heap huffman-code

我正在进行一项任务,我应该根据优先级队列为Huffman算法编写一个编码和解码应用程序。我们必须读取一个文件,计算字母的频率,然后启动算法。我有以下问题:

我的计数功能工作正常但它存储了数组中每个字母的频率 - 即使它是零。但是,如果我想使用该数组来构建我的最小堆,我会因为零而遇到重大问题。因此,我需要找到一种“消除”它们的方法。我不能只跳过它们,因为那时min heap算法不再起作用了(错误的邻居)。所以我想在一个向量中传递所有非零项,并使用向量而不是数组。但在那里我总是得到一个错误,告诉我矢量大小有问题。我真的不知道如何处理这个问题。 (我的最小堆仍然使用数组,因为我甚至无法在向量中传输条目。)

(请忽略主要我只是尝试那里的东西!)

using namespace std;


struct huffman_node
{   char data;
    int frequency;
    bool vector;
    huffman_node *left;
    huffman_node *right;

};

void swap_huffman_nodes(huffman_node &a, huffman_node &b)
{   char store_data = a.data;
    int store_frequency = a.frequency;

    a.data = b.data;
    a.frequency=b.frequency;
    b.data = store_data;
    b.frequency = store_frequency;


    huffman_node *store_left = a.left;
    huffman_node *store_right= a.right;

    a.left = b.left;
    a.right = b.right;
    b.left = store_left;
    b.right = store_right;

}
void print_node (huffman_node a)
{ cout << a.data << a.frequency << endl;
}





string line;
huffman_node Table[52];
vector <huffman_node> non_zero;

void build_table()
{   for (int i=1; i<27; i++)
    { Table[i].data = (char) (i+64);
        Table[i].left = NULL;
        Table[i].right = NULL;
    }
    for (int i=27; i<53; i++)
    { Table[i].data = (char) (i+70);
        Table[i].left = NULL;
        Table[i].right = NULL;
    }

}
int counter =0;
void count(){

    ifstream yourfile ("example.txt");
    if (yourfile.is_open())
    {
        while ( getline (yourfile,line) )
        {
            /*cout << line << '\n'; */
            unsigned long z=line.length();
            int i=0;
            while ( i < z)
            { /* cout << line[i] << endl; */

                for (int j=65; j<91; j++)
                { if ((int) line[i] == j)
                { int k=-64+j;
                    Table[k].frequency++;
                }
                }

                for (int j=97; j<123; j++)
                { if ((int) line[i] == j)
                { int k=-70+j;
                    Table[k].frequency++;
                }
                }

                i++;

            }

        }

        for (int i=1; i<53; i++)
        {   if (Table[i].frequency!=0)
        { non_zero.push_back(Table[i]);
            counter ++;
        }
        } 

        yourfile.close();
    }

    else cout << "Unable to open file";

}

class heap{
public:
    void buildheap()
    {
        for (int i=1; i<53; i++)
        {reheap(i);
        };


    }
    void reheap(int new_index)
    { int parent_index = new_index/2;
        while (parent_index > 0 && Table[parent_index].frequency > Table[new_index].frequency)
        {   swap_huffman_nodes(Table[parent_index], Table[new_index]);
            parent_index=parent_index/2;
            new_index=new_index/2;
        }
    };
    void delete_root()
    {   int non_null_entries=0;
        for (int i=1; i<53; i++)
        { if (Table[i].frequency!=-1) {non_null_entries++;};
        }

        swap_huffman_nodes(Table[1],Table[non_null_entries]);
        Table[non_null_entries].frequency=-1;
        non_null_entries--;


        rebuild_heap_root_deletion(1, non_null_entries);

    }

    void rebuild_heap_root_deletion(int new_root,int non_null_entries){
        int n;
        if (2 * new_root > non_null_entries){
            return;
        }
        if (2 * new_root + 1 <= non_null_entries
            && Table[2*new_root+1].frequency < Table[2*new_root].frequency){
            n = 2 * new_root + 1;
        } else {
            n = 2 * new_root;
        }
        if (Table[new_root].frequency > Table[n].frequency){
            swap_huffman_nodes(Table[new_root], Table[n]);
            rebuild_heap_root_deletion(n, non_null_entries);
        }
    }

void add_element(huffman_node new_heap_element)
    {   for (int i=52; i>0;i-- )
    { if (Table[i].frequency==-1 && Table[i-1].frequency!=-1)
    {   Table[i]=new_heap_element;
        reheap(i);
        break;
    }
    }
    }

    void print_Table()
    {
        for (int i=1; i<53; i++)
        { /*if (Table[i].frequency != -1) */
            cout << Table[i].frequency << " , " << Table[i].data << endl;
        }
    }



    bool empty_heap() // a heap is empty here if there are only "invalid huffman nodes" in it except the first one that contains all information.
    { for (int i=2; i < 53; i++)
    { if (Table[i].frequency!=-1)
    { return false;}
    }
        return true;
    }

};



int main(){

    ofstream myfile ("example.txt");
    if (myfile.is_open())
    {
        myfile << "Flori ist ein Koala.";
        myfile << "";
        myfile.close();
    }


    else cout << "Unable to open file";

    build_table();
    count();

    heap allan;
    cout << "\n";
    allan.buildheap();
    allan.print_Table();

    int i=0;
     /*while(i<500)
    {
        huffman_node base_1 = Table[1];
        allan.delete_root();
        huffman_node base_2 = Table[1];
        allan.delete_root();
        huffman_node parent;
        parent.data = '/';
        parent.frequency = base_1.frequency + base_2.frequency;
        parent.left = &base_1;
        parent.right = &base_2;
        allan.add_element(parent);
        i++;
    }



    return 0;
}

0 个答案:

没有答案