我是.net的新手。我在C#中做压缩和解压缩字符串。有一个XML,我正在转换为字符串,之后我正在进行压缩和解压缩。我的代码中没有编译错误,除非我解压缩我的代码并返回我的字符串,它只返回XML的一半。
以下是我的代码,请在我错的地方纠正我。
代码:
class Program
{
public static string Zip(string value)
{
//Transform string into byte[]
byte[] byteArray = new byte[value.Length];
int indexBA = 0;
foreach (char item in value.ToCharArray())
{
byteArray[indexBA++] = (byte)item;
}
//Prepare for compress
System.IO.MemoryStream ms = new System.IO.MemoryStream();
System.IO.Compression.GZipStream sw = new System.IO.Compression.GZipStream(ms, System.IO.Compression.CompressionMode.Compress);
//Compress
sw.Write(byteArray, 0, byteArray.Length);
//Close, DO NOT FLUSH cause bytes will go missing...
sw.Close();
//Transform byte[] zip data to string
byteArray = ms.ToArray();
System.Text.StringBuilder sB = new System.Text.StringBuilder(byteArray.Length);
foreach (byte item in byteArray)
{
sB.Append((char)item);
}
ms.Close();
sw.Dispose();
ms.Dispose();
return sB.ToString();
}
public static string UnZip(string value)
{
//Transform string into byte[]
byte[] byteArray = new byte[value.Length];
int indexBA = 0;
foreach (char item in value.ToCharArray())
{
byteArray[indexBA++] = (byte)item;
}
//Prepare for decompress
System.IO.MemoryStream ms = new System.IO.MemoryStream(byteArray);
System.IO.Compression.GZipStream sr = new System.IO.Compression.GZipStream(ms,
System.IO.Compression.CompressionMode.Decompress);
//Reset variable to collect uncompressed result
byteArray = new byte[byteArray.Length];
//Decompress
int rByte = sr.Read(byteArray, 0, byteArray.Length);
//Transform byte[] unzip data to string
System.Text.StringBuilder sB = new System.Text.StringBuilder(rByte);
//Read the number of bytes GZipStream red and do not a for each bytes in
//resultByteArray;
for (int i = 0; i < rByte; i++)
{
sB.Append((char)byteArray[i]);
}
sr.Close();
ms.Close();
sr.Dispose();
ms.Dispose();
return sB.ToString();
}
static void Main(string[] args)
{
XDocument doc = XDocument.Load(@"D:\RSP.xml");
string val = doc.ToString(SaveOptions.DisableFormatting);
val = Zip(val);
val = UnZip(val);
}
}
我的XML大小为63KB。
答案 0 :(得分:233)
压缩/解压缩字符串的代码
public static void CopyTo(Stream src, Stream dest) {
byte[] bytes = new byte[4096];
int cnt;
while ((cnt = src.Read(bytes, 0, bytes.Length)) != 0) {
dest.Write(bytes, 0, cnt);
}
}
public static byte[] Zip(string str) {
var bytes = Encoding.UTF8.GetBytes(str);
using (var msi = new MemoryStream(bytes))
using (var mso = new MemoryStream()) {
using (var gs = new GZipStream(mso, CompressionMode.Compress)) {
//msi.CopyTo(gs);
CopyTo(msi, gs);
}
return mso.ToArray();
}
}
public static string Unzip(byte[] bytes) {
using (var msi = new MemoryStream(bytes))
using (var mso = new MemoryStream()) {
using (var gs = new GZipStream(msi, CompressionMode.Decompress)) {
//gs.CopyTo(mso);
CopyTo(gs, mso);
}
return Encoding.UTF8.GetString(mso.ToArray());
}
}
static void Main(string[] args) {
byte[] r1 = Zip("StringStringStringStringStringStringStringStringStringStringStringStringStringString");
string r2 = Unzip(r1);
}
请记住,Zip
会返回byte[]
,而Unzip
会返回string
。如果你想要一个来自Zip
的字符串,你可以对它进行Base64编码(例如使用Convert.ToBase64String(r1)
)(Zip
的结果是非常二进制的!这不是你可以打印到的屏幕或直接在XML中写入)
建议的版本适用于.NET 2.0,因为.NET 4.0使用MemoryStream.CopyTo
。
重要事项:压缩内容无法写入输出流,直到GZipStream
知道它具有所有输入(即,有效压缩它需要所有数据) 。在检查输出流之前,您需要确保Dispose()
的{{1}}(例如GZipStream
)。这是通过上面的mso.ToArray()
块完成的。请注意,using() { }
是最里面的块,内容在其外部访问。在尝试访问数据之前解压缩GZipStream
的{{1}}也是如此。
答案 1 :(得分:77)
根据 this snippet 我使用此代码,它工作正常:
using System;
using System.IO;
using System.IO.Compression;
using System.Text;
namespace CompressString
{
internal static class StringCompressor
{
/// <summary>
/// Compresses the string.
/// </summary>
/// <param name="text">The text.</param>
/// <returns></returns>
public static string CompressString(string text)
{
byte[] buffer = Encoding.UTF8.GetBytes(text);
var memoryStream = new MemoryStream();
using (var gZipStream = new GZipStream(memoryStream, CompressionMode.Compress, true))
{
gZipStream.Write(buffer, 0, buffer.Length);
}
memoryStream.Position = 0;
var compressedData = new byte[memoryStream.Length];
memoryStream.Read(compressedData, 0, compressedData.Length);
var gZipBuffer = new byte[compressedData.Length + 4];
Buffer.BlockCopy(compressedData, 0, gZipBuffer, 4, compressedData.Length);
Buffer.BlockCopy(BitConverter.GetBytes(buffer.Length), 0, gZipBuffer, 0, 4);
return Convert.ToBase64String(gZipBuffer);
}
/// <summary>
/// Decompresses the string.
/// </summary>
/// <param name="compressedText">The compressed text.</param>
/// <returns></returns>
public static string DecompressString(string compressedText)
{
byte[] gZipBuffer = Convert.FromBase64String(compressedText);
using (var memoryStream = new MemoryStream())
{
int dataLength = BitConverter.ToInt32(gZipBuffer, 0);
memoryStream.Write(gZipBuffer, 4, gZipBuffer.Length - 4);
var buffer = new byte[dataLength];
memoryStream.Position = 0;
using (var gZipStream = new GZipStream(memoryStream, CompressionMode.Decompress))
{
gZipStream.Read(buffer, 0, buffer.Length);
}
return Encoding.UTF8.GetString(buffer);
}
}
}
}
答案 2 :(得分:30)
随着.NET 4.0(及更高版本)与Stream.CopyTo()方法的出现,我想我会发布一个更新的方法。
我还认为以下版本作为用于将常规字符串压缩为Base64编码字符串的自包含类的明显示例非常有用,反之亦然:
public static class StringCompression
{
/// <summary>
/// Compresses a string and returns a deflate compressed, Base64 encoded string.
/// </summary>
/// <param name="uncompressedString">String to compress</param>
public static string Compress(string uncompressedString)
{
byte[] compressedBytes;
using (var uncompressedStream = new MemoryStream(Encoding.UTF8.GetBytes(uncompressedString)))
{
using (var compressedStream = new MemoryStream())
{
// setting the leaveOpen parameter to true to ensure that compressedStream will not be closed when compressorStream is disposed
// this allows compressorStream to close and flush its buffers to compressedStream and guarantees that compressedStream.ToArray() can be called afterward
// although MSDN documentation states that ToArray() can be called on a closed MemoryStream, I don't want to rely on that very odd behavior should it ever change
using (var compressorStream = new DeflateStream(compressedStream, CompressionLevel.Fastest, true))
{
uncompressedStream.CopyTo(compressorStream);
}
// call compressedStream.ToArray() after the enclosing DeflateStream has closed and flushed its buffer to compressedStream
compressedBytes = compressedStream.ToArray();
}
}
return Convert.ToBase64String(compressedBytes);
}
/// <summary>
/// Decompresses a deflate compressed, Base64 encoded string and returns an uncompressed string.
/// </summary>
/// <param name="compressedString">String to decompress.</param>
public static string Decompress(string compressedString)
{
byte[] decompressedBytes;
var compressedStream = new MemoryStream(Convert.FromBase64String(compressedString));
using (var decompressorStream = new DeflateStream(compressedStream, CompressionMode.Decompress))
{
using (var decompressedStream = new MemoryStream())
{
decompressorStream.CopyTo(decompressedStream);
decompressedBytes = decompressedStream.ToArray();
}
}
return Encoding.UTF8.GetString(decompressedBytes);
}
这是使用扩展方法技术扩展String类以添加字符串压缩和解压缩的另一种方法。您可以将下面的课程放到现有项目中,然后使用:
var uncompressedString = "Hello World!";
var compressedString = uncompressedString.Compress();
和
var decompressedString = compressedString.Decompress();
即便:
public static class Extensions
{
/// <summary>
/// Compresses a string and returns a deflate compressed, Base64 encoded string.
/// </summary>
/// <param name="uncompressedString">String to compress</param>
public static string Compress(this string uncompressedString)
{
byte[] compressedBytes;
using (var uncompressedStream = new MemoryStream(Encoding.UTF8.GetBytes(uncompressedString)))
{
using (var compressedStream = new MemoryStream())
{
// setting the leaveOpen parameter to true to ensure that compressedStream will not be closed when compressorStream is disposed
// this allows compressorStream to close and flush its buffers to compressedStream and guarantees that compressedStream.ToArray() can be called afterward
// although MSDN documentation states that ToArray() can be called on a closed MemoryStream, I don't want to rely on that very odd behavior should it ever change
using (var compressorStream = new DeflateStream(compressedStream, CompressionLevel.Fastest, true))
{
uncompressedStream.CopyTo(compressorStream);
}
// call compressedStream.ToArray() after the enclosing DeflateStream has closed and flushed its buffer to compressedStream
compressedBytes = compressedStream.ToArray();
}
}
return Convert.ToBase64String(compressedBytes);
}
/// <summary>
/// Decompresses a deflate compressed, Base64 encoded string and returns an uncompressed string.
/// </summary>
/// <param name="compressedString">String to decompress.</param>
public static string Decompress(this string compressedString)
{
byte[] decompressedBytes;
var compressedStream = new MemoryStream(Convert.FromBase64String(compressedString));
using (var decompressorStream = new DeflateStream(compressedStream, CompressionMode.Decompress))
{
using (var decompressedStream = new MemoryStream())
{
decompressorStream.CopyTo(decompressedStream);
decompressedBytes = decompressedStream.ToArray();
}
}
return Encoding.UTF8.GetString(decompressedBytes);
}
答案 3 :(得分:6)
对于那些仍然 GZip标头中的幻数不正确的人。确保传入GZip流。错误 如果您的字符串是使用 php 压缩的,那么您需要执行以下操作:
public static string decodeDecompress(string originalReceivedSrc) {
byte[] bytes = Convert.FromBase64String(originalReceivedSrc);
using (var mem = new MemoryStream()) {
//the trick is here
mem.Write(new byte[] { 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00 }, 0, 8);
mem.Write(bytes, 0, bytes.Length);
mem.Position = 0;
using (var gzip = new GZipStream(mem, CompressionMode.Decompress))
using (var reader = new StreamReader(gzip)) {
return reader.ReadToEnd();
}
}
}
答案 4 :(得分:3)
这是使用async / await和IEnumerables的.NET 4.5及更新版本的更新版本:
public static class CompressionExtensions
{
public static async Task<IEnumerable<byte>> Zip(this object obj)
{
byte[] bytes = obj.Serialize();
using (MemoryStream msi = new MemoryStream(bytes))
using (MemoryStream mso = new MemoryStream())
{
using (var gs = new GZipStream(mso, CompressionMode.Compress))
await msi.CopyToAsync(gs);
return mso.ToArray().AsEnumerable();
}
}
public static async Task<object> Unzip(this byte[] bytes)
{
using (MemoryStream msi = new MemoryStream(bytes))
using (MemoryStream mso = new MemoryStream())
{
using (var gs = new GZipStream(msi, CompressionMode.Decompress))
{
//gs.CopyTo(mso);
await gs.CopyToAsync(mso);
}
return mso.ToArray().Deserialize();
}
}
}
public static class SerializerExtensions
{
/// <summary>
/// Writes the given object instance to a binary file.
/// <para>Object type (and all child types) must be decorated with the [Serializable] attribute.</para>
/// <para>To prevent a variable from being serialized, decorate it with the [NonSerialized] attribute; cannot be applied to properties.</para>
/// </summary>
/// <typeparam name="T">The type of object being written to the XML file.</typeparam>
/// <param name="filePath">The file path to write the object instance to.</param>
/// <param name="objectToWrite">The object instance to write to the XML file.</param>
/// <param name="append">If false the file will be overwritten if it already exists. If true the contents will be appended to the file.</param>
public static byte[] Serialize<T>(this T objectToWrite)
{
using (MemoryStream stream = new MemoryStream())
{
BinaryFormatter binaryFormatter = new BinaryFormatter();
binaryFormatter.Serialize(stream, objectToWrite);
return stream.GetBuffer();
}
}
/// <summary>
/// Reads an object instance from a binary file.
/// </summary>
/// <typeparam name="T">The type of object to read from the XML.</typeparam>
/// <param name="filePath">The file path to read the object instance from.</param>
/// <returns>Returns a new instance of the object read from the binary file.</returns>
public static async Task<T> _Deserialize<T>(this byte[] arr)
{
using (MemoryStream stream = new MemoryStream())
{
BinaryFormatter binaryFormatter = new BinaryFormatter();
await stream.WriteAsync(arr, 0, arr.Length);
stream.Position = 0;
return (T)binaryFormatter.Deserialize(stream);
}
}
public static async Task<object> Deserialize(this byte[] arr)
{
object obj = await arr._Deserialize<object>();
return obj;
}
}
通过这个,您可以序列化BinaryFormatter支持的所有内容,而不仅仅是字符串。
答案 5 :(得分:2)
我最喜欢@fubo的答案,但我认为这要优雅得多。
此方法更加兼容,因为它不会手动预先存储长度。
我还公开了扩展,以支持对字符串到字符串,字节[]到字节[]以及流到流的压缩。
public static class ZipExtensions
{
public static string CompressToBase64(this string data)
{
return Convert.ToBase64String(Encoding.UTF8.GetBytes(data).Compress());
}
public static string DecompressFromBase64(this string data)
{
return Encoding.UTF8.GetString(Convert.FromBase64String(data).Decompress());
}
public static byte[] Compress(this byte[] data)
{
using (var sourceStream = new MemoryStream(data))
using (var destinationStream = new MemoryStream())
{
sourceStream.CompressTo(destinationStream);
return destinationStream.ToArray();
}
}
public static byte[] Decompress(this byte[] data)
{
using (var sourceStream = new MemoryStream(data))
using (var destinationStream = new MemoryStream())
{
sourceStream.DecompressTo(destinationStream);
return destinationStream.ToArray();
}
}
public static void CompressTo(this Stream stream, Stream outputStream)
{
using (var gZipStream = new GZipStream(outputStream, CompressionMode.Compress))
{
stream.CopyTo(gZipStream);
gZipStream.Flush();
}
}
public static void DecompressTo(this Stream stream, Stream outputStream)
{
using (var gZipStream = new GZipStream(stream, CompressionMode.Decompress))
{
gZipStream.CopyTo(outputStream);
}
}
}
答案 6 :(得分:0)
我们可以通过使用StreamReader和StreamWriter来减少代码复杂性,而不是手动将字符串转换为字节数组。您只需要三个流:
public static byte[] Zip(string uncompressed)
{
byte[] ret;
using (var outputMemory = new MemoryStream())
{
using (var gz = new GZipStream(outputMemory, CompressionLevel.Optimal))
{
using (var sw = new StreamWriter(gz, Encoding.UTF8))
{
sw.Write(uncompressed);
}
}
ret = outputMemory.ToArray();
}
return ret;
}
public static string Unzip(byte[] compressed)
{
string ret = null;
using (var inputMemory = new MemoryStream(compressed))
{
using (var gz = new GZipStream(inputMemory, CompressionMode.Decompress))
{
using (var sr = new StreamReader(gz, Encoding.UTF8))
{
ret = sr.ReadToEnd();
}
}
}
return ret;
}