我正在使用Huffman代码创建压缩算法来压缩任何类型的文件,但我可以看到压缩的大小几乎与原始大小相同。例如,压缩后25 mb视频占用24 mb,压缩后606 kb图像占用60 kb。以下是我的整个代码。如果我做错了,请告诉我。
public static class ByteValues
{
public static Dictionary<byte, string> ByteDictionary;
public static void AddValues(byte b, string values)
{
if (ByteDictionary == null)
{
ByteDictionary = new Dictionary<byte, string>();
}
ByteDictionary.Add(b, values);
}
public static List<List<T>> Split<T>(this List<T> list, int parts)
{
int i = 0;
var splits = from item in list
group item by i++ % parts into part
select part.ToList();
return splits.ToList();
}
}
public class Node
{
public byte value;
public long freq;
public Node LeftNode;
public Node RightNode;
public void Traverse(string path)
{
if (LeftNode == null)
{
ByteValues.AddValues(value, path);
}
else
{
LeftNode.Traverse(path + "0");
RightNode.Traverse(path + "1");
}
}
}
public partial class MainWindow : Window
{
Dictionary<byte, long> Bytefreq = new Dictionary<byte, long>();
string filename;
List<Node> Nodes = new List<Node>();
public MainWindow()
{
InitializeComponent();
}
private void Button_Click_1(object sender, RoutedEventArgs e)
{
OpenFileDialog dialog = new OpenFileDialog();
dialog.ShowDialog();
filename = dialog.FileName;
if (!string.IsNullOrEmpty(filename))
{
for (int i = 0; i <= byte.MaxValue; i++)
{
Bytefreq.Add((byte)i, 0);
}
BackgroundWorker worker = new BackgroundWorker();
worker.WorkerReportsProgress = true;
worker.DoWork += worker_DoWork;
worker.ProgressChanged += worker_ProgressChanged;
worker.RunWorkerCompleted += worker_RunWorkerCompleted;
worker.RunWorkerAsync();
}
}
void worker_DoWork(object sender, DoWorkEventArgs e)
{
BackgroundWorker worker = sender as BackgroundWorker;
using (BinaryReader reader = new BinaryReader(File.OpenRead(filename)))
{
long length = reader.BaseStream.Length;
int pos = 0;
System.Windows.Application.Current.Dispatcher.Invoke(() =>
{
pbProgress.Maximum = length;
});
while (pos < length)
{
byte[] inputbytes = reader.ReadBytes(1000000);
Bytefreq = inputbytes.OrderBy(x => x).GroupBy(x => x).ToDictionary(x => x.Key, x => (long)(Bytefreq[x.Key] + x.Select(l => l).ToList().Count));
pos = pos + inputbytes.Length;
worker.ReportProgress(pos);
}
}
}
void worker_ProgressChanged(object sender, ProgressChangedEventArgs e)
{
pbProgress.Value = e.ProgressPercentage;
}
void worker1_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
{
System.Windows.MessageBox.Show("DONE");
System.Windows.Application.Current.Shutdown();
}
void worker_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
{
pbProgress.Value = 0;
foreach (KeyValuePair<byte, long> kv in Bytefreq)
{
Nodes.Add(new Node() { value = kv.Key, freq = kv.Value });
}
while (Nodes.Count > 1)
{
Nodes = Nodes.OrderBy(x => x.freq).ThenBy(x => x.value).ToList();
Node left = Nodes[0];
Node right = Nodes[1];
Node newnode = new Node() { LeftNode = left, RightNode = right, freq = left.freq + right.freq };
Nodes.Remove(left);
Nodes.Remove(right);
Nodes.Add(newnode);
}
Nodes[0].Traverse(string.Empty);
BackgroundWorker worker1 = new BackgroundWorker();
worker1.WorkerReportsProgress = true;
worker1.DoWork += worker1_DoWork;
worker1.ProgressChanged += worker_ProgressChanged;
worker1.RunWorkerCompleted += worker1_RunWorkerCompleted;
worker1.RunWorkerAsync();
}
void worker1_DoWork(object sender, DoWorkEventArgs e)
{
BackgroundWorker worker = sender as BackgroundWorker;
Dictionary<byte, string> bytelookup = ByteValues.ByteDictionary;
using (BinaryWriter writer = new BinaryWriter(File.Create(Environment.GetFolderPath(Environment.SpecialFolder.Desktop) + "\\Test.txt")))
{
using (BinaryReader reader = new BinaryReader(File.OpenRead(filename)))
{
long length = reader.BaseStream.Length;
int pos = 0;
while (pos < length)
{
byte[] inputbytes = reader.ReadBytes(1000000);
StringBuilder builder = new StringBuilder();
List<string> outputbytelist = inputbytes.Select(b => bytelookup[b]).ToList();
outputbytelist.ForEach(x => builder.Append(x));
int numOfBytes = builder.ToString().Length / 8;
var bytesAsStrings = builder.ToString().Select((c, i) => new { Char = c, Index = i })
.GroupBy(x => x.Index / 8)
.Select(g => new string(g.Select(x => x.Char).ToArray()));
byte[] finalbytes = bytesAsStrings.Select(s => Convert.ToByte(s, 2)).ToArray();
writer.BaseStream.Write(finalbytes, 0, finalbytes.Length);
pos = pos + inputbytes.Length;
worker.ReportProgress(pos);
}
}
}
}
}
答案 0 :(得分:2)
问题在于您尝试压缩的数据的类型。因此,当你说“E.g 25 mb视频在压缩后占用24 mb”时,这里的关键词是 video 。众所周知,视频数据很难压缩(很像其他类型的二进制数据,如音乐或图像)。
如果您需要压缩视频,我会搜索专用编解码器(MP4,MPEG,H.264),但有些可能无法免费使用,因此请注意许可证费用。请注意,大多数编解码器都是有损的 - 它们会尝试保留可见质量,但会从视频中删除其他信息。大多数这些东西都足够好,但在某些时候你可能会注意到文物。
您还可以尝试使用无损压缩(如Huffman,gzip,LZ,LZMA,7z,大多数来自7 zip sdk等),但由于其性质,这不会很好地压缩您的数据。基本思想是:越多的数据类似于随机噪声,压缩越难。加分点:您无法使用任何无损压缩来物理压缩随机数据,即使是1位(read about this here)。