C ++,获取网站源代码,第2部分

时间:2010-05-03 22:20:56

标签: c++

所以,我有这个来源:

#include <winsock2.h>
#include <windows.h>
#include <iostream>
#pragma comment(lib,"ws2_32.lib")

using namespace std;

int main (){
        WSADATA wsaData;

    if (WSAStartup(MAKEWORD(2,2), &wsaData) != 0) {
                cout << "WSAStartup failed.\n";
        system("pause");
                return 1;
    }

        SOCKET Socket=socket(AF_INET,SOCK_STREAM,IPPROTO_TCP);

        struct hostent *host;
        host = gethostbyname("www.newegg.com");

        SOCKADDR_IN SockAddr;
        SockAddr.sin_port=htons(80);
        SockAddr.sin_family=AF_INET;
        SockAddr.sin_addr.s_addr = *((unsigned long*)host->h_addr);

        cout << "Connecting...\n";
        if(connect(Socket,(SOCKADDR*)(&SockAddr),sizeof(SockAddr)) != 0){
                cout << "Could not connect";
                system("pause");
                return 1;
        }
        cout << "Connected.\n";

        send(Socket,"GET / HTTP/1.1\r\nHost: www.newegg.com\r\nConnection: close\r\n\r\n", strlen("GET / HTTP/1.1\r\nHost: www.newegg.com\r\nConnection: close\r\n\r\n"),0);
        char buffer[10000];

        int nDataLength;
        while ((nDataLength = recv(Socket,buffer,10000,0)) > 0){               
                int i = 0;
                while (buffer[i] || buffer[i] == '\n' || buffer[i] == '\r') {
                        cout << buffer[i];
                        i += 1;
                }
        }

        closesocket(Socket);
        WSACleanup();

        system("pause");
        return 0;
}

现在,它可以获得源代码,然而,它会继续重复。例如:

http://img263.imageshack.us/img263/7259/outputq.png

注意它是怎么说的,然后继续?我想知道如何避免这种情况。我知道我可以限制缓冲区,但是有更好的解决方案吗?感谢

1 个答案:

答案 0 :(得分:1)

尝试更改

    while (buffer[i] || buffer[i] == '\n' || buffer[i] == '\r') {
            cout << buffer[i];
            i += 1;
    }

    for(; i != nDataLength; ++i)
            cout << buffer[i];

看起来你正在读取缓冲区末尾的recv和朋友使用的垃圾数据。没有人说返回的数据必须以空值终止,这就是while循环假设的正确行为。