我刚刚实现了一个HTTP / 1.1客户端来解析分块传输编码。但是,它适用于某些网站但不适用于其他网站。我假设我需要为每个块数据读取chunkSize + 2
个字节,包括\r\n
,我是对的吗?
这是我的代码:
while(chunked)//if detecting chunked in the header before, this is true
{
//getLine is a function can read a line separated by \r\n
//sockfd is a socket created before and file position is at the start of HTTP body (after that blank line between header and body)
line = getLine(sockfd);
printf("%s", line);//print the chunk size line in hex
int chunkSize = strtol(line, NULL, 16);
if(chunkSize == 0)
{
printf("##### Read chunk size of 0, reading until we hit end of stream.\n");
break;
}
printf("##### Chunk size (in hex above) is %d in decimal and is printed here:\n", chunkSize);
char* chunkBuf = (char *)malloc(chunkSize + 2 + 1);//2 for \r\n, 1 for \0
bzero(chunkBuf, chunkSize + 3);
if(read(sockfd, chunkBuf, chunkSize + 2) == 0)//sockfd is a socket created before
{
perror("Read Error: ");
exit(EXIT_FAILURE);
}
printf("%s", chunkBuf);//print the chunk content
free(chunkBuf);
}
实际上我可以打印出整个内容而无需解析,即逐行打印,所以我想我可能会在上面的代码中犯一些错误,有人能给我一些提示吗?
以下是供您参考的完整代码:
#include <stdio.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <stdlib.h>
#include <netdb.h>
#include <netinet/in.h>
#include <string.h>
#include <stdbool.h>
#include <unistd.h>
#define HTTP_VERSION "HTTP/1.1"
#define PAGE "/"
int createSokect()
{
int socketfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if(socketfd < 0)
{
perror("Cannot create socket\n");
exit(EXIT_FAILURE);
}
return socketfd;
}
char* getIP(char* host)
{
struct hostent* hent;
int len = 15;//xxx.xxx.xxx.xxx
char *ipaddr = (char *)malloc(len + 1);//one more \0
bzero(ipaddr, len + 1);
if((hent = gethostbyname(host)) == NULL)
{
printf("Cannot get IP for this host: %s\n", host);
exit(EXIT_FAILURE);
}
if(inet_ntop(AF_INET, (void*)hent->h_addr_list[0], ipaddr, len) == NULL)
{
printf("Cannot resolve IP for this host: %s\n", host);
exit(EXIT_FAILURE);
}
return ipaddr;
}
char* createQuery(char* host, char* page)
{
char* msg = "GET %s %s\r\nHost: %s\r\nConnection: close\r\n\r\n";
char* query = (char *)malloc(strlen(host) + strlen(page) + strlen(msg) + strlen(HTTP_VERSION) - 6 + 1);//-6: %s %s %s
sprintf(query, msg, page, HTTP_VERSION, host);
return query;
}
char* getLine(int fd)
{
char c = 0, pre = 0;
char* line = 0;
int size = 1;
int pos = 0;
while(read(fd, &c, 1)!=0)
{
if(pos + 1 == size)
{
size *= 2;
line = realloc(line, size);
}
line[pos++] = c;
//printf("%c", c);
if(pre == '\r' && c == '\n')//this is a new line
{
break;
}
pre = c;
}
if(line)
{
line[pos++] = 0;
}
return line;
}
int main(int argc, char** argv)
{
if(argc < 3)
{
perror("Need more arguments");
exit(EXIT_FAILURE);
}
int sockfd = createSokect();
char* ip = getIP(argv[1]);
printf("Host: %s\n", argv[1]);
printf("IP: %s\n", ip);
struct sockaddr_in server;
server.sin_family = AF_INET;
int err = inet_pton(server.sin_family, ip, (void *)(&(server.sin_addr.s_addr)));
if(err != 1)
{
perror("Cannot convert IP to binary address\n");
exit(EXIT_FAILURE);
}
server.sin_port = htons(atoi(argv[2]));
printf("port: %d\n", server.sin_port);
//connect to the server
if(connect(sockfd, (struct sockaddr *)&server, sizeof(server)) < 0)
{
printf("Cannot connect: %d\n", err);
exit(EXIT_FAILURE);
}
char* query = createQuery(argv[1], PAGE);
printf("##### CLIENT IS SENDING THE FOLLOWING TO SERVER:\n");
printf("%s", query);
int offset = 0;
//send query to the server
err = send(sockfd, query + offset, strlen(query) - offset, 0);
if(err < 0)
{
perror("Cannot send query");
exit(EXIT_FAILURE);
}
printf("##### CLIENT RECEIVED THE FOLLOWING FROM SERVER:\n");
//receive message line by line
bool chunked = false;
char* line;
while((line = getLine(sockfd)) != NULL)
{
printf("%s", line);
if(!strcasecmp(line, "transfer-encoding: chunked\r\n"))
{
chunked = true;
//printf("Chunked here\n");
}
if(!strcmp(line, "\r\n"))
{
printf("##### Just read blank line, now reading body.\n");
if(chunked)//chunked, we print those in another way, otherwise line by line
{
free(line);
break;
}
}
free(line);
}
while(chunked)
{
line = getLine(sockfd);
printf("%s", line);
int chunkSize = strtol(line, NULL, 16);
if(chunkSize == 0)
{
printf("##### Read chunk size of 0, reading until we hit end of stream.\n");
break;
}
printf("##### Chunk size (in hex above) is %d in decimal and is printed here:\n", chunkSize);
char* chunkBuf = (char *)malloc(chunkSize + 2 + 1);//2 for \r\n, 1 for \0
bzero(chunkBuf, chunkSize + 3);
if(read(sockfd, chunkBuf, chunkSize + 2) == 0)
{
perror("Read Error: ");
exit(EXIT_FAILURE);
}
printf("%s", chunkBuf);
free(chunkBuf);
}
//receive message from the server
/*
char buf[2048];
bzero(buf, sizeof(buf));
err = recv(sockfd, buf, sizeof(buf), 0);
if(err < 0)
{
perror("Receive error");
exit(EXIT_FAILURE);
}
char *content = buf;
fprintf(stdout, content);*/
free(query);
free(ip);
close(sockfd);
printf("##### Connection closed by server.\n");
exit(EXIT_SUCCESS);
}
答案 0 :(得分:5)
该行:
if(read(sockfd, chunkBuf, chunkSize + 2) == 0) ...
将读取chunkSize + 2,即它可以读取更少。请参阅read
的手册页。您的代码应如下所示:
int n = 0;
while (n<chunkSize) {
r = read(sockfd, chunkBuf+n, chunkSize - n);
if (r <= 0) { error or closed conection ... }
n += r;
}
答案 1 :(得分:0)
因为我知道块大小,所以我一个接一个地读取字符,直到块大小。这种方式可以工作。但我仍然不明白为什么我在尝试一次使用整个块大小的read或recv时失败了。