当我连接到某些网站时,它会给我:
Content-Type: text/html; charset=ISO-8859-1
Connection: close
Transfer-Encoding: chunked
Date: Tue, 01 Jan 2013 18:49:53 GMT
fff8
在文件的末尾,它看起来像:
</script><!-- vBadvanced 1-3-9-4-8-0 -->
</body>
</html
1
>
0
但是,当我执行stackoverflow.com时,它打印出的格式非常好。它可能在源代码中有额外的空白行,但这很好..为什么其他网站会添加数字?
我该如何解决?另外,我如何将该标题与html本身分开?
我的代码如下:
#define _WIN32_WINNT 0x501
#include <iostream>
#include <winsock2.h>
#include <ws2tcpip.h>
#include <stdio.h>
#include <fstream>
#include <vector>
using namespace std;
void Get(string WebPage)
{
WSADATA wsaData;
string Address;
struct addrinfo *result;
struct sockaddr_in *sockaddr_ipv4;
char Buffer[99000];
string Header = "GET / HTTP/1.1\r\n";
Header += "Host: " + WebPage + "\r\n";
Header += "Connection: close\r\n";
Header += "\r\n";
if (WSAStartup(MAKEWORD(2,2), &wsaData) != 0) return;
SOCKET Socket = socket(AF_INET,SOCK_STREAM,IPPROTO_TCP);
getaddrinfo(WebPage.c_str(), NULL, NULL, &result);
if (result->ai_family == AF_INET)
{
sockaddr_ipv4 = (struct sockaddr_in *) result->ai_addr;
Address = inet_ntoa(sockaddr_ipv4->sin_addr);
}
freeaddrinfo(result);
SOCKADDR_IN SockAddr;
memset(&SockAddr, 0, sizeof(SockAddr));
SockAddr.sin_port = htons(80);
SockAddr.sin_family = AF_INET;
SockAddr.sin_addr.s_addr = inet_addr(Address.c_str());
if(connect(Socket,(SOCKADDR*)(&SockAddr),sizeof(SockAddr)) == SOCKET_ERROR) return;
if (send(Socket, Header.c_str(), Header.size(), 0) == SOCKET_ERROR) return;
shutdown(Socket, SD_SEND);
std::string Response;
int bytes = 1;
while (bytes > 0)
{
bytes = recv(Socket, Buffer, sizeof(Buffer), 0);
Buffer[bytes] = '\0';
Response.append(Buffer, bytes);
};
closesocket(Socket);
WSACleanup();
}
int main()
{
Get("google.com");
}
答案 0 :(得分:3)
请参阅此Wiki页面:http://en.wikipedia.org/wiki/Chunked_transfer_encoding
这些十六进制数(块长度)中的每一个都跟随指定大小的实际块数据(有效负载),紧接着是另一个块长度。如果块长度为零,则不再跟随其他数据字节(eof)。这些元素由换行符分隔。 我不确定,您发布的内容是否可以正确连接,看来,您需要处理多个连续的换行符。只需在浏览器中查看页面及其来源即可。
编辑:
刚刚找到这个嗅探工具,它显示了我想知道的所有细节:
答案 1 :(得分:-1)
此功能将“解锁”您的HTTP数据 - 在VB6中,但您会明白(真的是旧代码)
Private Function UnChunk(Indata As String) As String
If InStr(LCase(Indata), "transfer-encoding:") = 0 And InStr(LCase(Indata), "chunked") = 0 Then
'not chunked, so return the input
UnChunk = Indata
Exit Function
End If
'can't let this crash
On Error GoTo returnInData
Dim crlfstart As Long
Dim crlfend As Long
Dim chunksize As Long
'first, get header, which ends with 2 line feeds
crlfstart = InStr(Indata, vbCrLf & vbCrLf)
If crlfstart = 0 Then
'invalid http
UnChunk = Indata
Exit Function
End If
UnChunk = Left(Indata, crlfstart + 2)
'start looking for vbCrLf
crlfstart = InStr(crlfstart + 2, Indata, vbCrLf)
Do While crlfstart > 0
'find the next vbCrLf
crlfend = InStr(crlfstart + 1, Indata, vbCrLf)
If crlfend > crlfstart And crlfend - crlfstart < 10 Then
'convert the HEX string to the chunksize
chunksize = Val("&h" & Mid(Indata, crlfstart + 2, crlfend - (crlfstart + 2)))
'by spec, if 0 then no more data
If chunksize > 0 Then
'there's more data
'this should be unnecessary, but one more test
If Mid(Indata, crlfend + 2 + chunksize, 2) = vbCrLf Then
UnChunk = UnChunk & Mid(Indata, crlfend + 2, chunksize)
Else
'oops, failed
Exit Do
End If
Else
'there's no more data so return what we have
Exit Function
End If
End If
'look again
crlfstart = InStr(crlfstart + 1, Indata, vbCrLf)
Loop
'just in case this fails, return the input data
returnInData:
UnChunk = Indata
End Function