从返回的邮件中提取特定号码

时间:2014-04-08 18:13:12

标签: c parsing

我正在使用Linux中的GET函数编写一个程序来模拟一个非常基本的Internet Explorer。我似乎唯一的问题是,我不确定如何从返回值中提取内容长度的值。现在我有硬编码的数字,但它们只适用于我的文件大小,而不是更大或更小的数字。如果我能设法解析内容长度,我可以解决这个问题。如果有人能让我走上解决问题的正确道路,我将非常感激。

#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>


/*  VSIE client program uses TCP protocol to connect to the remote http server.
    The program will take 2 input arguments:
        1) command option, get (receive) or head (send)
        2) http URL address 
*/

#define MAX 80
#define MAX2 1024
#define http "HTTP/1.1"
#define TRUE   1
#define FALSE  0
#define HEADERSTOP "\n\n"
main(int argc, char *argv[])
{   
    unsigned char *e;
    char command[MAX];
    char server[MAX];
    char path[MAX];
    char filename[MAX]= "";
    char httpString[MAX];
    int i, x, f, n, length = 0;
    int numBytes = 0;
    int getData = TRUE;
    int getFlag = FALSE;
    int flag = FALSE;
    int headFlag = FALSE;
    FILE *in;

    int sk;
    unsigned char buf[MAX2];
    struct sockaddr_in remote;
    struct hostent *hp;
    struct servent *sp;
    short port = 0;


    // parse input arguments
    sscanf(argv[2],"%[^'/']%s",server,path);

    if (strcmp(argv[1],"-get") == 0)
    {
        sprintf(command, "GET");
        getFlag = TRUE;
    }
    else if (strcmp(argv[1],"-head") == 0)
    {
        sprintf(command, "HEAD");
    }

    //build http 1.1 GET or HEAD message
    sprintf(httpString,"%s %s %s\nHost: %s\n\n", command, path,http,server);

    printf("command = %s, server = %s, path = %s\n", command, server, path);
    printf("httpString = %s\n",httpString);

    //parse filename from path
    length = strlen(path);
    x=0;
    f=0;

    for(i = 0; i < length; i++)
    {
        //printf("path[%d] = %c \n",i,path[i]);
        if ((flag == TRUE) & (f == 2))
        {
            filename[x] = path[i];
            x++;
        }

        if (path[i] == '/')
        {
            flag = TRUE;
            f++;
        }

    }

    printf("filename = %s\n", filename);

    //if command = get, open filename   
    //if(command == "-get") 
    if (getFlag == TRUE)
    {
        if((in = fopen (filename,"w")) == NULL)
        {
            //printf("FAILURE: opening input file %s\n",filename);
            perror("FAILURE: opening input file");
            exit(1);
        }
        printf("file opened successfully\n");
    }

    //get internet address of host & port number of http service
    hp = gethostbyname(server);
    if (hp == NULL) 
    {
        printf("Can't find host name. %s\n", server);
        exit (1);
    }

    //copy the h_addr (source) to s_add (destination) for n bytes specified by length
    bcopy(hp->h_addr,&remote.sin_addr.s_addr,hp->h_length);

    /* get the port number */
    sp = getservbyname("http", "tcp");
    if (sp == NULL)
    {
        printf("can't find port # %d\n",sp->s_port);
        exit (1);
    }
    port = sp->s_port;
    remote.sin_port = sp->s_port;
    printf("port = %d, port = %d \n", port, remote.sin_port);

    //create socket for http server - socket type: Sock_Stream, protocol: TCP
    sk = socket(AF_INET,SOCK_STREAM,0);
    if (sk < 0)
    {
        perror("error opening socket");
        exit(1);
    }
    remote.sin_family = AF_INET;    

    //initiate connection to the server address w/ TCP socket
    if (connect(sk, (struct sockaddr *) &remote, sizeof(remote)) < 0) 
    {
        printf("connect fails!\n");
        exit(1);
    }
    printf("connection successful\n");

    //send http message
    printf("send message:%s\n", httpString);
    //send(sk,httpString,strlen(httpString)+1,0);
    if(send(sk,httpString,sizeof(httpString),0) < 0)
    {
        printf("send() failed");
        //exit(1);
    }

    n = 1;
    //Loop until all data received
    while(getData == TRUE)
    {   
        //wait for and print the return message
        numBytes = recv(sk,buf,sizeof(buf),0);
        if (numBytes < 0)
        {
            perror("error reading from socket");
            break;
        }
        else if (numBytes < MAX2)
        {
            getData = FALSE;
            printf("***end while loop****\n");
        }
        if (headFlag == FALSE){
            e = memchr(buf, '\n', sizeof(buf)); 

            while (*(e+1) != '\r'){
                e = memchr(e+1, '\n', sizeof(buf));
            }
            headFlag = TRUE;
        }

        printf("\n****number of bytes received %d****\n",numBytes); 

        //saved the retrieved content into the file (input argument)

        if (getFlag == TRUE)
        {
            //printf("write output\n");
            printf("%.*s\n", (numBytes-763), buf);
            if(e != NULL){
                fwrite(e, numBytes, 1, in);
                e = NULL;
            }else{
                fwrite(buf, numBytes, 1, in);
            }

        }
        n++;
    } // end while()

    //close socket & file
    close(sk);

    if(fclose(in) !=0)
    {
        perror("FAILURE: Closing input file");
        exit(1);
    }

    return 0;
} //end main()

返回的信息是:

****number of bytes received 1024****
HTTP/1.1 200 OK

Date: Tue, 08 Apr 2014 17:37:10 GMT

Server: Apache/2.2.22 (Ubuntu)

Last-Modified: Tue, 18 Feb 2014 19:41:06 GMT

ETag: "1724117-9fb-4f2b373fef880"

Accept-Ranges: bytes

Content-Length: 2555

Vary: Accept-Encoding

Content-Type: text/html

1 个答案:

答案 0 :(得分:2)

以下是一些代码,向您展示了一种方法。对你来说重要的是

strtok - split the string into lines
strstr - find a string that contains the words you are looking for
sscanf - scan the line for the value of the integer

其他所有东西都是为了让这个例子有效。

#include <stdio.h>
#include <string.h>

int main(void) {
char httpString[]="HTTP/1.1 200 OK\n"\
"Date: Tue, 08 Apr 2014 17:37:10 GMT\n"\
"Server: Apache/2.2.22 (Ubuntu)\n"\
"Last-Modified: Tue, 18 Feb 2014 19:41:06 GMT\n"\
"ETag: \"1724117-9fb-4f2b373fef880\"\n"\
"Accept-Ranges: bytes\n"\
"Content-Length: 2555\n"\
"Vary: Accept-Encoding\n"\
"Content-Type: text/html\n";
printf("string is %s\n", httpString);
char *line;
line = strtok(httpString, "\n");
while(line != NULL) {
  if (strstr(line, "Content-Length:")!= NULL) {
    int theNumber;
    sscanf(line, "Content-Length: %d", &theNumber);
    printf("The number is %d\n", theNumber);
  }
  line = strtok(NULL, "\n");
}
return 0;
}

输出:

string is HTTP/1.1 200 OK
Date: Tue, 08 Apr 2014 17:37:10 GMT
Server: Apache/2.2.22 (Ubuntu)
Last-Modified: Tue, 18 Feb 2014 19:41:06 GMT
ETag: "1724117-9fb-4f2b373fef880"
Accept-Ranges: bytes
Content-Length: 2555
Vary: Accept-Encoding
Content-Type: text/html

The number is 2555

或者 - 正如@enhzflep在评论中所提到的,如果你在原文中查找字符串Content-length:的索引,那么你可以使用在此之后开始的字符串做sscanf点:

char searchString[] = "Content-length:";
int offset, number;
offset = strstr(httpString, searchString);
sscanf(offset + strlen(searchString), "%d", &number);