我有10行文件,我压缩到Bz2格式,但是当我解压缩它时,我看到生成的文件只有9行。数据丢失为1.5行。这是我压缩到Bz2的代码。我正在使用DotNet zip库 https://dotnetzip.codeplex.com/
以下是压缩代码,我将文件转换为UTF-8和Bz2文件。
static string Compress(string sourceFile, bool forceOverwrite)
{
var outFname = fname + ".bz2";
if (File.Exists(outFname))
{
if (forceOverwrite)
File.Delete(outFname);
else
return null;
}
long rowCount = 0;
var output = File.Create(outFname);
try
{
using (StreamReader reader = new StreamReader(fname))
{
using (var compressor = new Ionic.BZip2.ParallelBZip2OutputStream(output))
{
StreamWriter writer = new StreamWriter(compressor, System.Text.Encoding.UTF8);
string line = "";
while ((line = reader.ReadLine()) != null)
{
writer.WriteLine(line);
rowCount++;
if (rowCount % 100000 == 0)
Console.WriteLine("InProgress..Current Row # " + rowCount.ToString());
}
}
}
}
catch (Exception)
{
throw;
}
finally
{
if (output != null)
output = null;
}
// Pump(fs, compressor);
return outFname;
}
我厌倦了改变下面的阅读方法
// int charsRead;
// char[] buffer = new char[2048];
// while ((charsRead = reader.ReadBlock(buffer, 0, buffer.Length)) > 0)
// {
// writer.Write(buffer, 0, charsRead);
// rowCount++;
// if (rowCount % 100000 == 0)
// Console.WriteLine("InProgress..Current Row # " + rowCount.ToString());
// }
对于解压缩,这是代码
public static string Decompress(string fname, bool forceOverwrite)
{
var outFname = Path.GetFileNameWithoutExtension(fname);
if (File.Exists(outFname))
{
if (forceOverwrite)
File.Delete(outFname);
else
return null;
}
using (Stream fs = File.OpenRead(fname),
output = File.Create(outFname),
decompressor = new Ionic.BZip2.BZip2InputStream(fs))
Pump(decompressor, output);
return outFname;
}
private static void Pump(Stream src, Stream dest)
{
byte[] buffer = new byte[2048];
int n;
while ((n = src.Read(buffer, 0, buffer.Length)) > 0)
dest.Write(buffer, 0, n);
}
在调试过程中,我看到readline正在正确读取数据,不确定在将实际文件转换为Bz2或从Bz2读取时,此库dll中是否存在错误。请让我知道这个问题的原因
答案 0 :(得分:0)
建议你从这里获得DotNetZip:https://www.nuget.org/packages/DotNetZip/
我必须在while循环后直接将以下两行添加到Compress方法中。
writer.Close();
compressor.Close();
没有它,你的代码抛出异常。在那之后,你的代码适用于我运行它的10行文本文件。没有遗漏数据。