C - 来自套接字

时间:2016-02-29 20:54:04

标签: c linux sockets posix

我有一个用C语言编写的简单代理服务器程序。我遇到了一些数据不一致的问题。当我请求网站时,我将结果数据保存在服务器端的文件中,然后将其发送到客户端并将其保存在客户端。客户端和服务器上的结果将具有不同的大小,并且看起来某些HTML正在被复制。通常,保存在服务器上的文件将小于客户端上保存的文件,尽管两个文件仍然大于实际网页(即,如果我右键单击页面并“另存为”,则生成的页面小于我的代码返回的那些)。我已经尝试了各种方法来解决这个问题,似乎没有任何工作。结果甚至在同一网站的尝试之间似乎有所不同。例如,我可以两次请求同一个网站,但文件大小与两次尝试都不同。在非常罕见的情况下,特别是在小型网站上,客户端和服务器程序都会返回正确的网页,并且这两个文件都具有适当的大小。

注意:我知道代码仍然很混乱。在我继续前进之前,我更担心解决这个问题。我将在纠正此问题后解决问题(例如检查套接字是否无法打开),因此请仅解决我已概述的问题。

Server.c

#include <sys/types.h>
#include <sys/socket.h>
#include <netdb.h>
#include <stdio.h>
#include <string.h>
#include <arpa/inet.h>
#include <regex.h>
#include <time.h>

/*Code used to resolve URL into IP address adapted from following URL:
http://www.binarytides.com/hostname-to-ip-address-c-sockets-linux/
*/

//This code has been adapted from the server code provided in class

int main(int argc, char** argv)
{
    char str[655360];
    char recvline[655360];
    char parsedRecv[655360];
    char domain[1025];
    char directory[1025];
    char absoluteURL[1025];
    char temp[1025];


    int listen_fd, conn_fd, n, tempCount;
    struct sockaddr_in servaddr;
    int bytesRead;
    int stringCounter;

    int port;

    FILE *fp;

    //Variables used for second socket and resolving of host
    char ip[100];
    int sockfd, secondSocketCount;
    int len = sizeof(struct sockaddr);
    struct sockaddr_in secondServaddr;
    struct addrinfo *servinfo, *p;
    struct sockaddr_in *h;
    int rv;
    char simpleMatch[10];
    int flag = 0;
    //End

    //Used for HTTP GET request
    char request[2049];

    listen_fd = socket(AF_INET, SOCK_STREAM, 0);

    bzero(&servaddr, sizeof(servaddr));

    servaddr.sin_family = AF_INET;
    servaddr.sin_addr.s_addr = htons(INADDR_ANY);
    if(argc < 2)
    {
        printf("Error! Enter a port number to run this server on.\n\tEx: ./server 22000\n\r\0");
        return 0;
    }
    else
    {
        port = atoi(argv[1]);
    }

    servaddr.sin_port = htons(port);

    printf("\n");
    printf("Awaiting connections...\n");

    bind(listen_fd, (struct sockaddr*)&servaddr, sizeof(servaddr));
    listen(listen_fd, 10);

    //Once the server is listening, enter an infinite loop to keep listening
    while(1)
    {
        conn_fd = accept(listen_fd, (struct sockaddr*) NULL, NULL);
        bytesRead = read(conn_fd, recvline, sizeof(recvline));
        if(bytesRead > 0) //data read
        {
            recvline[bytesRead] = '\0';

            bzero(absoluteURL, 1025);
            strcpy(absoluteURL, recvline);

            //Extract host and page from recvline
            //For loop used to check if URL begins with HTTP or HTTPS
            for(stringCounter = 0; stringCounter < 5; stringCounter++)
            {
                simpleMatch[stringCounter] = tolower(recvline[stringCounter]);
            }
            simpleMatch[strlen(simpleMatch)] = '\0';
            if(strcmp("http:", simpleMatch) == 0)
            {
                for(stringCounter = 7, tempCount = 0; stringCounter < strlen(recvline); stringCounter++, tempCount++)
                {
                    temp[tempCount] = recvline[stringCounter];
                }
                temp[strlen(temp)] = '\0';
                strcpy(recvline, temp);
            }
            else if(strcmp("https", simpleMatch) == 0)
            {
                for(stringCounter = 8, tempCount = 0; stringCounter < strlen(recvline); stringCounter++, tempCount++)
                {
                    temp[tempCount] = recvline[stringCounter];
                }
                temp[strlen(temp)] = '\0';
                strcpy(recvline, temp);
            }

            //printf("\n\nAfter stripping HTTP, we are left with: %s\n\n", recvline);

            //Now that HTTP:// or HTTPS:// has been stripped, can parse for domain
            for(stringCounter = 0, tempCount = 0; stringCounter < strlen(recvline); stringCounter++)
            {
                //moving domain into the domain string
                if(flag == 0)
                {
                    if(recvline[stringCounter] != '/')
                    {
                        domain[stringCounter] = recvline[stringCounter];
                    }
                    else
                    {
                        domain[stringCounter + 1] = '\0';
                        //directory[tempCount] = recvline[stringCounter];
                        flag = 1;
                        tempCount++;
                    }
                }
                else
                {
                    directory[tempCount] = recvline[stringCounter];
                    tempCount++;
                }
            }

            //printf("\n\nDirectory is: %s\n\n", directory);

            //reset flag and append '\0' to directory and domain
            flag = 0;
            if(tempCount < 1025)
            {
                directory[tempCount] = '\0';
            }
            //directory[strlen(directory)] = '\0';
            //domain[strlen(domain)] = '\0';

            //Done extracting

            //Resolve hostname to IP

            if((rv = getaddrinfo(domain, NULL, NULL, &servinfo)) != 0)
            {
                printf("Error: an IP address cannot be resolved for %s\n", domain);
                return 0;
                //fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(rv));
            }
            else
            {
                for(p = servinfo; p != NULL; p = p->ai_next)
                {
                    h = (struct sockaddr_in *) p->ai_addr;
                    strcpy(ip, inet_ntoa(h->sin_addr));
                }

                freeaddrinfo (servinfo);

                printf("%s resolved to: %s\n", domain, ip);
            }
            //End Resolve

            //Now that the IP is resolved, open a socket and connect to the IP

            //Open socket
            sockfd = socket(AF_INET, SOCK_STREAM, 0);
            bzero(&secondServaddr, sizeof(secondServaddr));

            secondServaddr.sin_family = AF_INET;
            secondServaddr.sin_port = htons(80);

            inet_pton(AF_INET, ip, &(secondServaddr.sin_addr)); //IP taken from earlier resolution
            connect(sockfd, (struct sockaddr*) &secondServaddr, sizeof(secondServaddr));

            //socket is open, can create and send request, finally
            bzero(request, 2049);
            //sprintf(request, "GET %s HTTP/1.1\r\nHost: %s\r\n\r\n", directory, domain);
            //sprintf(request, "GET %s HTTP/1.1\r\n\r\n", absoluteURL);
            //sprintf(request, "GET %s HTTP/1.1\r\nHost: %s\r\n\r\n", absoluteURL, domain);
            sprintf(request, "GET /%s HTTP/1.1\r\nHost: %s\r\n\r\n", directory, domain);

            write(sockfd, request, strlen(request));

            printf("\tAttempting to retrieve data: this may be slow.\n");

            bzero(recvline, 655360);
            bzero(parsedRecv, 655360);

            //Old method used to retrieve data
            //This was changed when I began to run into issues
            /*while(1)
            {
                secondSocketCount = read(sockfd, parsedRecv, sizeof(parsedRecv));
                if(secondSocketCount == -1)
                {
                    printf("Error receiving data: server terminating.\n");
                    return 0;
                }
                else if(secondSocketCount == 0)
                {
                    //no more data
                    break;
                }
                strcat(recvline, parsedRecv);
            }*/

            //This while loop is used to read in data (the response from the server)
            bzero(str, 655360);
            while(secondSocketCount = read(sockfd, recvline, sizeof(recvline)) > 0)
            {
                strcat(str, recvline);
            }
            //bzero(parsedRecv, 655360);
            //recvline[strlen(recvline)] = '\0';

            printf("\tData retrieved from main server.\n");

            //This for loop finds the end of the HTTP header and copies everything after into parsedRecv
            for(stringCounter = 0, tempCount = 0; stringCounter < strlen(str); stringCounter++)
                {
                    //lazy if statement to find two \r\n in a row to mark the end of the header
                    if(str[stringCounter] == '\r' && str[stringCounter + 1] == '\n' && str[stringCounter + 2] == '\r' && str[stringCounter + 3] == '\n' && flag == 0)
                    {
                        flag = 1;
                        stringCounter += 3;
                    }
                    if(flag == 1)
                    {
                        parsedRecv[tempCount] = str[stringCounter];
                        tempCount++;
                    }
                }
            flag = 0;
            parsedRecv[strlen(parsedRecv)] = '\0';
            fp = fopen("ReturnedPageServer.html", "w");
            if(fp != NULL)
            {
                fprintf(fp, "%s", parsedRecv);
                //fprintf(fp, "%s", recvline);
            }
            fclose(fp);
            printf("\tData saved to ReturnedPageServer.html\n");

        }

        //strcpy(str, "This is a test of the Hello World Broadcast System.\n");
        bzero(str, 655360);
        strcpy(str, parsedRecv);
        write(conn_fd, str, strlen(str));
        close(conn_fd);
        printf("\tData sent to client.\n");
        printf("Awaiting further connections...\n");

        //strcpy(directory, "");
        //strcpy(domain, "");
        //strcpy(recvline, "");
        bzero(directory, 1025);
        bzero(domain, 1025);
        bzero(temp, 1025);
        bzero(recvline, 655360);
    }
    return 0;
}

Client.c

#include <sys/types.h>
#include <sys/socket.h>
#include <netdb.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

//Code adapted from client code provided in class

int main(int argc, char** argv)
{
    int sockfd, n, port;
    int len = sizeof(struct sockaddr);
    char sendline[10000];
    char recvline[655360];
    struct sockaddr_in servaddr;

    FILE *fp;

    sockfd = socket(AF_INET, SOCK_STREAM, 0);
    bzero(&servaddr, sizeof(servaddr));

    servaddr.sin_family = AF_INET;
    if(argc < 2)
    {
        printf("Error! Enter the port number for the server.\n\tEx: ./client 22000\n\r\0");
        return 0;
    }
    else
    {
        port = atoi(argv[1]);
    }
    servaddr.sin_port = htons(port);

    inet_pton(AF_INET, "129.120.151.94", &(servaddr.sin_addr)); //CSE01 IP
    connect(sockfd, (struct sockaddr*) &servaddr, sizeof(servaddr));

    printf("url: ");
    scanf("%s", sendline);

    //strcpy(sendline, "The server should display this text.\n\0");
    //printf("\nLength of string: %d\n", strlen(sendline));
    //printf("\t%s\n", sendline);
    write(sockfd, sendline, strlen(sendline));

    fp = fopen("ReturnedPageClient.html", "w");
    bzero(recvline, 655360);
    while(n = read(sockfd, recvline, sizeof(recvline)) > 0)
    {
        //printf("%s", recvline);
        if(fp != NULL)
        {
            fprintf(fp, "%s", recvline);
        }
        else
        {
            printf("\tError saving file: client terminating.\n");
            fclose(fp);
            return 0;
        }

    }
    fclose(fp);
    printf("\tResponse received from proxy server.\n\tFile saved as \"ReturnedPageClient.html\"\n");
    close(sockfd);
    return 0;
}

2 个答案:

答案 0 :(得分:2)

一个问题 - 不确定它的'问题' - 是否期望TCP是面向消息的。即你发送一个500字节的消息,并期望接收一个500字节的块。这不是TCP的工作原理。 TCP是面向流的,500字节发送可以接收为服务器上的250个2字节读取,或1 500字节或100,50,100,2,2,2,2,2,240。您必须在服务器上循环直到你收到所有'消息'。这将导致不一致的行为,特别是在本地工作但不在“真正的”网络上工作

这反过来又提出了一个问题,即你如何知道自己收到了一条完整的“信息”。您需要一些允许消息框架的更高级别协议(比如发送固定大小的长度,然后是正文)

答案 1 :(得分:0)

要使用套接字对客户端和服务器之间的通信进行排序,您需要某种协议,以便连接中的每一方都可以确定消息何时完成。通过TCP套接字传输的消息可以以与发送方写入的大小不同的块接收。

您不会在程序中考虑到这一点,您认为成功的read请求会返回发送方write请求所写的数据。这是不正确的:您应该继续从套接字读取并将读取的块存储到请求缓冲区中,直到有完整请求或套接字关闭。

一个简单的协议是一次写一行。使用\n个字符对请求和答案进行排序。这或多或少是HTTP,SMTP和POP协议的作用。

代码很多问题:

  • 它有语法错误阻止编译。

  • 您将非常大的缓冲区定义为自动变量,超过2MB,您可能会在某些系统上出现堆栈溢出。

  • 由于您不传输'\0'终结符,因此读入缓冲区的数据应为空终止。您似乎意识到了这个问题,但您的方法parsedRecv[strlen(parsedRecv)] = '\0';没有做任何有用的事情。根据定义,strlen(parsedRecv)'\0'字节的偏移量。如果字符串未正确地终止,则strlen将扫描超出缓冲区的末尾并调用未定义的行为。成功'\0'后,您应该手动设置recvline[n] = '\0';字节与read

  • 您尝试将字符串片段与strcmp匹配:这不是最有效的,因为您需要先将片段复制到单独的缓冲区并将其终止,以便strcmp可以比较那是一个完整的字符串。请使用memcmp代替指定的片段长度。