通过代理使用套接字发出HTTP请求

时间:2015-12-17 16:26:02

标签: c++ sockets

我想使用套接字发出HTTP请求。到目前为止,这是我的代码:

   #include "stdafx.h"
#ifndef UNICODE
#define UNICODE
#endif

#define _WINSOCK_DEPRECATED_NO_WARNINGS
#define WIN32_LEAN_AND_MEAN

#include <winsock2.h>
#include <ws2tcpip.h>
#include <stdio.h>
#include <iostream>

#pragma comment(lib, "ws2_32.lib")
using namespace std;

int main()
{
    try {
        WSADATA wsaData;
        int iResult = WSAStartup(MAKEWORD(2, 2), &wsaData);
        sockaddr_in clientService;


        SOCKET Socket = socket(AF_INET, SOCK_STREAM, 0);

        memset(&clientService, 0, sizeof(clientService));
        clientService.sin_addr.s_addr = inet_addr("83.233.53.59"); // Proxy IP
        clientService.sin_family = AF_INET;
        clientService.sin_port = htons(10200);

        if (bind(Socket, (struct sockaddr *) &clientService, sizeof(clientService)) < 0) {
            perror("bind");
            exit(1);
        }
        system("pause");
        return 0;
        struct hostent *host;
        host = gethostbyname("www.google.com");
        SOCKADDR_IN SockAddr;
        SockAddr.sin_port = htons(80);
        SockAddr.sin_family = AF_INET;
        // SockAddr.sin_addr.s_addr = *((unsigned long*)host->h_addr);
        memcpy(host->h_addr, &(SockAddr.sin_addr.s_addr), host->h_length);
        std::cout << "Connecting...\n";

        iResult = connect(Socket, (SOCKADDR *)& clientService, sizeof(clientService));
        if (iResult != 0) {
            std::cout << "Could not connect";
            getchar();
            return 1;
        }
        std::cout << "Connected.\n";
        send(Socket, "GET / HTTP / 1.1\r\nHost: www.google.com\r\nConnection: close\r\n\r\n", strlen("GET / HTTP / 1.1\r\nHost: www.google.com\r\nConnection: close\r\n\r\n"), 0);
        char buffer[10000];
        int nDataLength;
        while ((nDataLength = recv(Socket, buffer, 10000, 0)) > 0) {
            int i = 0;
            while (buffer[i] >= 32 || buffer[i] == '\n' || buffer[i] == '\r') {
                std::cout << buffer[i];
                i += 1;
            }
        }
        iResult = closesocket(Socket);
        WSACleanup();

        system("pause");
    }
    catch (...) {
        system("pause");

    }
    return 0;
}

但是如果没有让我自己关闭网页的HTML源代码,它就无法正常工作。怎么了?

我该如何解决?

3 个答案:

答案 0 :(得分:0)

这适用于我的机器,但我不在Windows机器上。我在freeBSD(OS X)机器上。无法解决gethostbyname问题,不知道该怎么做,但这有效并连接并从谷歌下载代码。

#include <stdio.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#define _WINSOCK_DEPRECATED_NO_WARNINGS
#define WIN32_LEAN_AND_MEAN

//#include <winsock2.h>
//#include <ws2tcpip.h>
#include <stdio.h>
#include <iostream>

#pragma comment(lib, "ws2_32.lib")
using namespace std;

int main()
{
    try {
        //        WSADATA wsaData;
        //        int iResult = WSAStartup(MAKEWORD(2, 2), &wsaData);
        // sockaddr_in clientService;


        int Socket = socket(AF_INET, SOCK_STREAM, 0);
        /*
         memset(&clientService, 0, sizeof(clientService));
         clientService.sin_addr.s_addr = inet_addr("83.233.53.59"); // Proxy IP
         clientService.sin_family = AF_INET;
         clientService.sin_port = htons(10200);

         if (bind(Socket, (struct sockaddr *) &clientService, sizeof clientService) == -1) {
         perror("bind");
         exit(1);
         }
         system("pause");
         return 0;
         */
         const char hostname[] ="www.google.com";
         struct hostent * host;
//         host = gethostbyname(hostname);

        sockaddr_in SockAddr;
        memset(&SockAddr, 0, sizeof(SockAddr));
        SockAddr.sin_port = htons(80);
        SockAddr.sin_family = AF_INET;
        SockAddr.sin_addr.s_addr = inet_addr("83.233.53.59");
        //        memcpy(host->h_addr, &(SockAddr.sin_addr.s_addr), host->h_length);
        std::cout << "Connecting...\n";

        int iResult = connect(Socket, (struct sockaddr *)& SockAddr, sizeof(SockAddr));
        if (iResult != 0) {
            std::cout << "Could not connect";
            getchar();
            return 1;
        }
        std::cout << "Connected.\n";
        send(Socket, "GET / HTTP / 1.1\r\nHost: www.google.com\r\nConnection: close\r\n\r\n", strlen("GET / HTTP / 1.1\r\nHost: www.google.com\r\nConnection: close\r\n\r\n"), 0);
        char buffer[10000];
        int nDataLength;
        while ((nDataLength = recv(Socket, buffer, 10000, 0)) > 0) {
            int i = 0;
            while (buffer[i] >= 32 || buffer[i] == '\n' || buffer[i] == '\r') {
                std::cout << buffer[i];
                i += 1;
            }
        }
        iResult = close(Socket);
        //        WSACleanup();

        system("pause");
    }
    catch (...) {
        system("pause");

    }
    return 0;
}

它在http:级别有一个身份验证失败,但是Heres输出:

Connecting...
Connected.
HTTP/1.0 401 Unauthorized
Server: uhttpd/1.0.0
Date: Thu, 17 Dec 2015 18:29:04 GMT
WWW-Authenticate: Basic realm="                 "
Content-Type: text/html; charset="UTF-8"
Connection: close

<HTML><HEAD><META http-equiv='Pragma' content='no-cache'><META http-equiv='Cache-Control' content='no-cache'><TITLE> 401 Authorization</TITLE>
<script language=javascript type=text/javascript>
function cancelevent()
{
sh: pause: command not found
Program ended with exit code: 0

答案 1 :(得分:0)

可能出错的一点是,在以下循环中访问超出范围的数组索引时,您的应用程序可能会崩溃:

while (buffer[i] >= 32 || buffer[i] == '\n' || buffer[i] == '\r') {
    ...
    i += 1;
}

您当前的代码无法保证buffer包含一个会使您的循环终止的字节,因此它可能会无限期地继续。您必须通过在访问数组索引之前进行额外检查来防止这种情况。考虑到nDataLength将始终小于或等于sizeof(buffer),请尝试以下操作:

while (i < nDataLength &&
    (buffer[i] >= 32 || buffer[i] == '\n' || buffer[i] == '\r'))
{
    // Do your printing.
    i++;
}

或者更简单:

while (buffer[i] >= 32 || buffer[i] == '\n' || buffer[i] == '\r')
{
    // Do your printing.
    i++;

    if (i >= nDataLength)
        break; // Exit the loop.
}

答案 2 :(得分:0)

system("pause");
return 0;

此代码可防止执行任何操作。删除。

您的代码还有许多其他问题。例如,您将套接字绑定到代理地址。这没有意义。去掉。您即将连接套接字,根本不需要绑定它。

然后您向代理发送无效的GET请求。在这种情况下,GET请求应包含完整的URL,而不仅仅是相对URL。

当您搜索空间等时,您将超越接收缓冲区。您需要按recv()返回的计数绑定该搜索。

等等。