压缩后文件大小的霍夫曼编码问题

时间:2015-01-23 12:45:00

标签: c# huffman-code

我正在使用Huffman代码创建压缩算法来压缩任何类型的文件,但我可以看到压缩的大小几乎与原始大小相同。例如,压缩后25 mb视频占用24 mb,压缩后606 kb图像占用60 kb。以下是我的整个代码。如果我做错了,请告诉我。

     public static class ByteValues
      {
        public static Dictionary<byte, string> ByteDictionary;

        public static void AddValues(byte b, string values)
         {
          if (ByteDictionary == null)
          {
          ByteDictionary = new Dictionary<byte, string>();
           }

      ByteDictionary.Add(b, values);
        }

  public static List<List<T>> Split<T>(this List<T> list, int parts)
  {
      int i = 0;
      var splits = from item in list
                   group item by i++ % parts into part
                   select part.ToList();
      return splits.ToList();
  }
}


    public class Node
    {
    public byte value;
    public long freq;
    public Node LeftNode;
    public Node RightNode;

    public void Traverse(string path)
    {
        if (LeftNode == null)
        {
            ByteValues.AddValues(value, path);
        }
        else
        {

            LeftNode.Traverse(path + "0");
            RightNode.Traverse(path + "1");
        }
    }

}

   public partial class MainWindow : Window
{
    Dictionary<byte, long> Bytefreq = new Dictionary<byte, long>();
    string filename;
    List<Node> Nodes = new List<Node>();

    public MainWindow()
    {
        InitializeComponent();
    }

    private void Button_Click_1(object sender, RoutedEventArgs e)
    {

        OpenFileDialog dialog = new OpenFileDialog();
        dialog.ShowDialog();
        filename = dialog.FileName;
        if (!string.IsNullOrEmpty(filename))
        {
            for (int i = 0; i <= byte.MaxValue; i++)
            {
                Bytefreq.Add((byte)i, 0);
            }
            BackgroundWorker worker = new BackgroundWorker();
            worker.WorkerReportsProgress = true;
            worker.DoWork += worker_DoWork;
            worker.ProgressChanged += worker_ProgressChanged;
            worker.RunWorkerCompleted += worker_RunWorkerCompleted;
            worker.RunWorkerAsync();

        }
    }


    void worker_DoWork(object sender, DoWorkEventArgs e)
    {
        BackgroundWorker worker = sender as BackgroundWorker;
        using (BinaryReader reader = new BinaryReader(File.OpenRead(filename)))
        {
            long length = reader.BaseStream.Length;
            int pos = 0;
            System.Windows.Application.Current.Dispatcher.Invoke(() =>
            {
                pbProgress.Maximum = length;
            });

            while (pos < length)
            {
                byte[] inputbytes = reader.ReadBytes(1000000);
                Bytefreq = inputbytes.OrderBy(x => x).GroupBy(x => x).ToDictionary(x => x.Key, x => (long)(Bytefreq[x.Key] + x.Select(l => l).ToList().Count));
                pos = pos + inputbytes.Length;
                worker.ReportProgress(pos);
            }
        }

    }
    void worker_ProgressChanged(object sender, ProgressChangedEventArgs e)
    {
        pbProgress.Value = e.ProgressPercentage;
    }



    void worker1_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
    {
        System.Windows.MessageBox.Show("DONE");
        System.Windows.Application.Current.Shutdown();
    }
    void worker_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
    {
        pbProgress.Value = 0;
        foreach (KeyValuePair<byte, long> kv in Bytefreq)
        {
            Nodes.Add(new Node() { value = kv.Key, freq = kv.Value });
        }

        while (Nodes.Count > 1)
        {
            Nodes = Nodes.OrderBy(x => x.freq).ThenBy(x => x.value).ToList();
            Node left = Nodes[0];
            Node right = Nodes[1];

            Node newnode = new Node() { LeftNode = left, RightNode = right, freq = left.freq + right.freq };
            Nodes.Remove(left);
            Nodes.Remove(right);
            Nodes.Add(newnode);
        }

        Nodes[0].Traverse(string.Empty);

        BackgroundWorker worker1 = new BackgroundWorker();
        worker1.WorkerReportsProgress = true;
        worker1.DoWork += worker1_DoWork;
        worker1.ProgressChanged += worker_ProgressChanged;
        worker1.RunWorkerCompleted += worker1_RunWorkerCompleted;
        worker1.RunWorkerAsync();

    }
    void worker1_DoWork(object sender, DoWorkEventArgs e)
    {
        BackgroundWorker worker = sender as BackgroundWorker;
        Dictionary<byte, string> bytelookup = ByteValues.ByteDictionary;
        using (BinaryWriter writer = new BinaryWriter(File.Create(Environment.GetFolderPath(Environment.SpecialFolder.Desktop) + "\\Test.txt")))
        {
            using (BinaryReader reader = new BinaryReader(File.OpenRead(filename)))
            {
                long length = reader.BaseStream.Length;
                int pos = 0;

                while (pos < length)
                {

                    byte[] inputbytes = reader.ReadBytes(1000000);
                    StringBuilder builder = new StringBuilder();
                    List<string> outputbytelist = inputbytes.Select(b => bytelookup[b]).ToList();

                    outputbytelist.ForEach(x => builder.Append(x));

                    int numOfBytes = builder.ToString().Length / 8;
                    var bytesAsStrings =  builder.ToString().Select((c, i) => new { Char = c, Index = i })
                                                            .GroupBy(x => x.Index / 8)
                                                            .Select(g => new string(g.Select(x => x.Char).ToArray()));
                     byte[] finalbytes = bytesAsStrings.Select(s => Convert.ToByte(s, 2)).ToArray();
                     writer.BaseStream.Write(finalbytes, 0, finalbytes.Length);
                    pos = pos + inputbytes.Length;
                    worker.ReportProgress(pos);
                }
            }
        }
    }


}

1 个答案:

答案 0 :(得分:2)

问题在于您尝试压缩的数据的类型。因此,当你说“E.g 25 mb视频在压缩后占用24 mb”时,这里的关键词是 video 。众所周知,视频数据很难压缩(很像其他类型的二进制数据,如音乐或图像)。

如果您需要压缩视频,我会搜索专用编解码器(MP4,MPEG,H.264),但有些可能无法免费使用,因此请注意许可证费用。请注意,大多数编解码器都是有损的 - 它们会尝试保留可见质量,但会从视频中删除其他信息。大多数这些东西都足够好,但在某些时候你可能会注意到文物。

您还可以尝试使用无损压缩(如Huffman,gzip,LZ,LZMA,7z,大多数来自7 zip sdk等),但由于其性质,这不会很好地压缩您的数据。基本思想是:越多的数据类似于随机噪声,压缩越难。加分点:您无法使用任何无损压缩来物理压缩随机数据,即使是1位(read about this here)。