从get请求中读取站点主体

时间:2016-01-19 17:18:44

标签: c++ http get request wikipedia

我尝试从get请求读取网站主体,但我只获取状态代码和一些从html文本开始(这是我得到的 - " HTTP / 1.1 200 OK服务器:Apa!DOCTYPE html html lang = E&#34)。如果你能帮助我解决问题,我将不胜感激。感谢

代码 -

#include <winsock2.h>
#include <WS2tcpip.h>
#include <windows.h>
#include <iostream>
#include <vector>
#include <ostream>

int main(){

// Initialize Dependencies to the Windows Socket.
WSADATA wsaData;
if (WSAStartup(MAKEWORD(2, 2), &wsaData) != 0) {
    cout << "WSAStartup failed.\n";
    system("pause");
}

struct addrinfo hints;
ZeroMemory(&hints, sizeof(hints));
hints.ai_family = AF_INET;
hints.ai_protocol = IPPROTO_TCP;
hints.ai_socktype = SOCK_STREAM;

/* connect and download the article */
static const char wiki_host[] = "en.wikipedia.org";
struct addrinfo* targetAdressInfo = NULL;
DWORD getAddrRes = getaddrinfo(wiki_host, NULL, &hints, &targetAdressInfo);
if (getAddrRes != 0 || targetAdressInfo == NULL)
{
    cout << "Could not resolve the Host Name" << endl;
    system("pause");
    WSACleanup();
    return -1;
}

SOCKADDR_IN sockAddr;
sockAddr.sin_addr = ((struct sockaddr_in*) targetAdressInfo->ai_addr)->sin_addr;
sockAddr.sin_family = AF_INET;
sockAddr.sin_port = htons(80);

freeaddrinfo(targetAdressInfo);

SOCKET webSocket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (webSocket == INVALID_SOCKET)
{
    cout << "Creation of the Socket Failed" << endl;
    system("pause");
    WSACleanup();
    return -1;
}

if (connect(webSocket, (SOCKADDR*)&sockAddr, sizeof(sockAddr)) != 0)
{
    cout << "Could not connect";
    system("pause");
    closesocket(webSocket);
    WSACleanup();
    return -1;
}

string http_query = "GET / https://en.wikipedia.org/w/api.php?titles=StackOverflow&action=query&prop=extracts&format=json\r\nConnection: close\r\n\r\n";
if (send(webSocket, http_query.c_str(), http_query.length(), 0) == -1) {
    cout << "Could not send the request to the Server" << endl;
    system("pause");
    closesocket(webSocket);
    WSACleanup();
    return -1;
}

/* prepare to fetch the wiki article */

string response = "";

while (true) {
    static char recv_buffer[4096];
    const int bytes_read = recv(webSocket, recv_buffer, sizeof(recv_buffer) - 1, 0);
    if (!bytes_read) {
        break;
    }
    if (bytes_read == -1) {
        closesocket(webSocket);
        WSACleanup();
    }
    recv_buffer[bytes_read] = '\0';
    response += recv_buffer;
};

/* finished with the socket */

closesocket(webSocket);
WSACleanup();

/* parse the http response headers */

size_t cursor = 0;
string response_content;
vector<std::string> response_headers;
const size_t headers_end = response.find("\r\n\r\n");

while (true) {
    const size_t line_end = response.find("\r\n", cursor);
    if (line_end == std::string::npos) { /* probably due to http error */
        break;
    }
    response_headers.push_back(response.substr(cursor, line_end - cursor));
    if (line_end == headers_end) { /* found content */
        response_content = response.substr(headers_end + 4); /* skip \r\n\r\n */
        break;
    }
    cursor = line_end + 2; /* skip \r\n */
}


// print the respone 
for (int i = 0; i < sizeof(response); i++){
    cout << response[i];
}

//print response_content 
for (int i = 0; i < sizeof(response); i++){
cout << response_content[i];
}

system("pause");
return 0;
}

1 个答案:

答案 0 :(得分:1)

这不起作用:

for (int i = 0; i < sizeof(response); i++){ cout << response[i]; }

因为sizeof(response)是字符串对象的大小,不是字符串的长度。你应该简单地做

cout << response;

如果您真的想要遍历字符串(速度较慢且不推荐),则必须使用response.size()代替sizeof(response)

您的代码还有其他一些问题,但这应该可以解决手头的问题。