从http套接字中提取位置

时间:2013-09-06 02:48:36

标签: c string scanf strstr

我将以下http回复保存在名为source.txt的本地文件中:

HTTP/1.1 301 Moved
Connection: close
Content-length: 111
Location: https://11.12.13.14:81/
Content-type: text/html; charset="utf-8"

<html><head><META HTTP-EQUIV="refresh" CONTENT="0;URL=https://11.12.13.14:81/"></head><body></body></html>

和以下代码:

#include <stdio.h>
#include <stdlib.h>
#define MAXBUFLEN 1024

char* getLocation(char* source)
{
    const char *p1 = strstr(source, "Location:")+10;
    const char *p2 = strstr(p1, "\n");
    size_t len = p2-p1;
    char *res = (char*)malloc(sizeof(char)*(len+1));
    strncpy(res, p1, len);
    res[len] = '\0';
    return res;
}

char* getData(char* source)
{
    const char *p1 = strstr(source, "://")+3;
    const char *p2 = strstr(p1, "\n");
    size_t len = p2-p1;
    char *res = (char*)malloc(sizeof(char)*(len+1));
    strncpy(res, p1, len);
    res[len] = '\0';
    return res;
}

int main()
{
    char source[MAXBUFLEN];
    char host[100];
    int port;
    FILE *fp = fopen("source.txt", "r");
    if (fp != NULL) {
        size_t newLen = fread(source, sizeof(char), MAXBUFLEN, fp);
        if (newLen == 0) {
            fputs("Error reading file", stderr);
        } else {
            source[++newLen] = '\0';

//extraction code
            char* line = getLocation(source);
            printf("getLocation result: %s\n", line);
            if (strstr(line, "://"))
            {
                char* res = getData(line);//here is the error
                printf("getData result: %s\n", res);
                if (strstr(res, ":"))
                {
                    sscanf(res, "%[^:]:%d[^/]", host, &port);
                    printf("host: %s | port: %d\n", host, port);
                }
                else
                    printf("delimiter not found\n");
            }
            else
                printf("no link\n");
//
        }
    }
    fclose(fp);
}

该计划运作良好,但非常难看。

有没有办法改进代码以避免执行这么多操作?

我的意思是合并这两个函数getLocation和getData ...

编辑:我的错误,getData必须从res中提取而不是从源

3 个答案:

答案 0 :(得分:0)

这样的事情显而易见:

char * getstuff(char * source, char * label) {
    const char *p1 = strstr(source, label) + strlen(label);
    const char *p2 = strstr(p1, "\n");
    size_t len = p2-p1;

    char *res = malloc(len+1);
    if ( res == NULL ) {
        fputs("Couldn't allocate memory.", stderr);
        exit(EXIT_FAILURE);
    }

    strncpy(res, p1, len);
    res[len] = '\0';

    return res;
}

char* getLocation(char* source) {
    return getstuff(source, "Location: ");
}

char* getData(char* source) {
    return getstuff(source, "://");
}

或仅拥有getstuff()并完全省略getLocation()getData(),如果您只打算调用每个函数一次。

答案 1 :(得分:0)

假设您正在使用linux, 我在awk中有一个答案:

awk '///:/{print $2}' source.txt 

的行为就像您的getLocation()

一样

我怀疑getData()实际上应该为您提供html content(但您的代码会返回与getLocation()相同但没有http://的字符串)。因此,这是我的awk代码,用于获取html内容。

awk '/<html>/{print $0}' source.txt

将为您提供html回复的实际内容。(当然我假设内容中没有\n个字符。但可以轻松扩展。)

要将其集成到您的代码中,只需执行以下操作:

system("command >> op.txt");

其中command指的是我之前写过的两个awk命令。然后,您可以从文件op.txt中读取输出。 30行代码只有2行(+一些代码来读取op.txt)。我希望这有帮助。 :):)

答案 2 :(得分:0)

#include <stdio.h>
#include <stdlib.h>
#define MAXBUFLEN 1024

char* getLocation(char* source)
{
    const char *p1 = strstr(source, "Location:")+10;
    const char *p2 = strstr(p1, "\n");
    size_t len = p2-p1;
    char *res = (char*)malloc(sizeof(char)*(len+1));
    strncpy(res, p1, len);
    res[len] = '\0';
    return res;
}

int main()
{
    char source[MAXBUFLEN];
    char host[100];
    int port;
    FILE *fp = fopen("source.txt", "r");
    if (fp != NULL) {
        size_t newLen = fread(source, sizeof(char), MAXBUFLEN, fp);
        if (newLen == 0) {
            fputs("Error reading file", stderr);
        } else {
            source[++newLen] = '\0';

//extraction code
            char* res = getLocation(source);
            printf("getLocation result: %s\n", res);
            res = strstr(res,"://");
            if (res != NULL)
            {
                res = res+3;
                if (strstr(res, ":"))
                {
                    sscanf(res, "%[^:]:%d[^/]", host, &port);
                    printf("host: %s | port: %d\n", host, port);
                }
                else
                    printf("delimiter not found\n");
            }
            else
                printf("no link\n");
//
        }
    }
    fclose(fp);
}