我需要使用.net套接字加载任何网页,这样我就可以控制自己如何连接和接收数据。
注意
更新
可以通过运行C#HTTP代理服务器并使用WebClient代理地址来使用WebClient并自行控制连接和数据。 #Mentalis
答案 0 :(得分:1)
在RFC中非常清楚地描述了分块传输编码。每个块都存在于:
chunk-size[;chunk-extensions]<CRLF>
chunk-data<CRLF>
首先发送块大小,这是一个十六进制数,它指定了期望的块数据的字节数,可选地后跟块扩展,由CRLF或\r\n
继续。在您读取指定数量的字节后,您可以期待另一个CRLF,因此您必须再读取两个字节。
然后你就可以开始阅读下一个块了。如果chunk-size
为0
,则期望再读取两个CRLF's
(不将其添加到缓冲区中),然后您已收到所有数据,因为大小为0的块表示最后一个块。< / p>
请注意,您无法使用ReadLine()
来读取块,因为响应正文中的换行符(即chunk-data
)将被视为行,因此它可能会在整个块之前返回已被阅读。
答案 1 :(得分:0)
以下是如何使用C#加载包含分块网页的任何网页,该网页适用于.Net中可用的所有类型的流
此代码基于RFC 2616-Section-3.6
public class HttpStream : Stream
{
internal readonly Stream InnerStream;
private long length;
private bool canRead;
internal bool Chunked { get; set; }
internal int ChunkLength { get; set; }
internal int ChunkReceivedPosition { get; set; }
internal HttpStream(Stream innerStream)
{
InnerStream = innerStream;
ChunkLength = -1;
canRead = true;
}
public override int Read(byte[] buffer, int offset, int count)
{
if (!canRead)
return -1;
var bytesReadInCallSession = 0;
if (Chunked)
{
do
{
if (ChunkLength == -1)
{
// read next chunked content size
string chunkLengthString = InnerStream.ReadLine(returnLineEndBytes: false);
ChunkLength = Convert.ToInt32(chunkLengthString, 16);
}
// end of HTTP response-body
if (ChunkLength == 0)
{
canRead = false;
break;
}
int toRead = ChunkLength;
if (count + ChunkReceivedPosition - bytesReadInCallSession < ChunkLength)
toRead = count + ChunkReceivedPosition - bytesReadInCallSession;
// read chunked part
while (ChunkReceivedPosition < toRead)
{
var bytesRead = InnerStream.Read(buffer, offset + bytesReadInCallSession, toRead - ChunkReceivedPosition);
ChunkReceivedPosition += bytesRead;
bytesReadInCallSession += bytesRead;
Position += bytesRead;
}
if (ChunkReceivedPosition == ChunkLength)
{
// force to read next chunk size in next loop
ChunkLength = -1;
ChunkReceivedPosition = 0;
// discard anything until we reach after the first CR LF
InnerStream.ReadLine();
}
if (bytesReadInCallSession == count)
break;
} while (true);
if (!canRead)
{
do
{
string trailer = InnerStream.ReadLine();
if (String.IsNullOrWhiteSpace(trailer))
break;
// TODO: process trailers
} while (true);
}
return bytesReadInCallSession;
}
else
{
var countRead = InnerStream.Read(buffer, offset, count);
Position += countRead;
return countRead;
}
}
public override void SetLength(long value)
{
length = value;
}
public override bool CanRead
{
get { return canRead; }
}
public override long Length
{
get { return length; }
}
public override long Position
{
get;
set;
}
public override void Flush()
{
throw new NotImplementedException();
}
public override long Seek(long offset, SeekOrigin origin)
{
throw new NotImplementedException();
}
public override void Write(byte[] buffer, int offset, int count)
{
throw new NotImplementedException();
}
public override bool CanSeek
{
get { throw new NotImplementedException(); }
}
public override bool CanWrite
{
get { throw new NotImplementedException(); }
}
}
我们需要一个高级的ReadLine作为Stream类的扩展方法
public static class Extensions
{
public static string ReadLine(this Stream stream, bool returnLineEndBytes = true, byte[] lineEndBytes = null)
{
// default end line bytes
if (lineEndBytes == null)
lineEndBytes = new byte[2] { 13, 10 };
StringBuilder stringBuilder = new StringBuilder("");
var buffer = new byte[lineEndBytes.Length];
var index = 0;
do
{
var byteRead = stream.ReadByte();
// end of stream break loop
if (byteRead == -1)
break;
stringBuilder.Append((char)byteRead);
buffer[index] = (byte)byteRead;
if (index == lineEndBytes.Length - 1 && buffer.SequenceEqual(lineEndBytes))
break;
// shift bytes by one to the left
if (index == lineEndBytes.Length - 1)
buffer = buffer.Skip(1).Concat(new byte[] { 0 }).ToArray();
if (index < lineEndBytes.Length - 1)
index++;
} while (true);
if (!returnLineEndBytes)
stringBuilder = stringBuilder.Remove(stringBuilder.Length - lineEndBytes.Length, lineEndBytes.Length);
return stringBuilder.ToString();
}
}