我正在尝试从维基百科下载数据。我发送了一个GET请求,但返回只包含页面状态和一些HTML详细信息。
我做错了什么?
#include <winsock2.h>
#include <WS2tcpip.h>
#include <windows.h>
#include <iostream>
int main(){
WSADATA wsaData;
if (WSAStartup(MAKEWORD(2, 2), &wsaData) != 0) {
cout << "WSAStartup failed.\n";
system("pause");
return -1;
}
struct addrinfo hints;
ZeroMemory(&hints, sizeof(hints));
hints.ai_family = AF_INET;
hints.ai_protocol = IPPROTO_TCP;
hints.ai_socktype = SOCK_STREAM;
struct addrinfo* targetAdressInfo = NULL;
DWORD getAddrRes = getaddrinfo("www.wikipedia.org", NULL, &hints, &targetAdressInfo);
if (getAddrRes != 0 || targetAdressInfo == NULL)
{
cout << "Could not resolve the Host Name" << endl;
system("pause");
WSACleanup();
return -1;
}
SOCKADDR_IN sockAddr;
sockAddr.sin_addr = ((struct sockaddr_in*) targetAdressInfo->ai_addr)->sin_addr;
sockAddr.sin_family = AF_INET;
sockAddr.sin_port = htons(80);
freeaddrinfo(targetAdressInfo);
SOCKET webSocket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (webSocket == INVALID_SOCKET)
{
cout << "Creation of the Socket Failed" << endl;
system("pause");
WSACleanup();
return -1;
}
if (connect(webSocket, (SOCKADDR*)&sockAddr, sizeof(sockAddr)) != 0)
{
cout << "Could not connect";
system("pause");
closesocket(webSocket);
WSACleanup();
return -1;
}
// Sending a HTTP-GET-Request to the Web Server
const char* httpRequest = "GET / http://en.wikipedia.org/w/api.php?action=query&prop=extracts&format=json&exintro=&titles=Google HTTP/1.1\r\n\r\n";
int sentBytes = send(webSocket, httpRequest, strlen(httpRequest), 0);
if (sentBytes < strlen(httpRequest) || sentBytes == SOCKET_ERROR)
{
cout << "Could not send the request to the Server" << endl;
system("pause");
closesocket(webSocket);
WSACleanup();
return -1;
}
char buffer[1000000];
ZeroMemory(buffer, sizeof(buffer));
int dataLen;
while ((dataLen = recv(webSocket, buffer, sizeof(buffer), 0) > 0))
{
int i = 0;
while (buffer[i] >= 32 || buffer[i] == '\n' || buffer[i] == '\r') {
cout << buffer[i];
i += 1;
}
}
closesocket(webSocket);
WSACleanup();
system("pause");
return 0;
}
答案 0 :(得分:0)
答案 1 :(得分:0)
如何使用python脚本?
import urllib2
def get_page(url):
request = urllib2.Request(url)
request = urllib2.urlopen(request)
data = request.read()
return data
url = "http://en.wikipedia.org/w/api.php?action=query&prop=extracts&format=json&exintro=&titles=Google HTTP/1.1\r\n\r\n"
print get_page(url)