使用libcurl检索的标头信息来确定文件名

时间:2015-05-05 18:18:32

标签: c libcurl

我正在尝试使用libcurl解析头信息,以便保留我正在下载的原始文件名。我目前正在寻找子字符串" Content-Disposition"在标题的每一行中,以便找到文件名。我编写了以下代码,其思路是通过编辑查找表来扩展我在标题数据中寻找的子字符串的数量(如下所述)。我遇到的问题是,无论我在哪个网址上测试我的代码,我都会根据标题信息测试我想要的子字符串(目前只有" Content-Disposition:")。为什么我不会在标题信息中看到这个?

以下是相关代码:

/**
 * file_info_container - structure containing file information
 * file_name: name of the file
 * file_url: URL of the file
 * file_type: type of the file
 * 
 * Structure containing data to be accessed by different
 * CURL operations.
 */
typedef struct {
    char *file_name;
    char *file_url;
    char *file_type;
}  file_info_container;

/**
 * filename_tags - a lookup table of sub-strings to be compared with strings
 *                   that may contain the original (remote) file name. The
 *                   indexes of this table will always correspond with the
 *                   table filename_denotations.
 */
static const char *filename_tags[] =  {
"Content-Disposition: \0",
};

/**
 * filename_tags - a lookup table of expected patterns that directly relate
 *                   to the location of a filename in a string known to have
 *                   a filename. The indexes of this table will always
 *                   correspond with the table filename_tags.
 */
static const char *filename_denotations[] =  {
"filename=\"",
};

...

/**
 * header_cb - callback function used by curl to parse header information
 * @headerln: contents of current header line being parsed
 * headerln_data_size: data size of headerln[n]
 * headerln_n: number of elements in headerln[]
 * @parsed_data: structure where file information is stored
 *
 * Populates  parsed_data->file_name with either the filename provided by
 * the file header (if it is available) or NULL if it is not.
 */
static size_t header_cb(char *headerln, size_t headerln_data_size,
                        size_t headerln_n, void *parsed_data)
{
    char *bff = NULL, *filename = NULL;
    file_info_container *file_info = (file_info_container*) parsed_data;
    size_t i, ret = headerln_data_size * headerln_n;

    for(i = 0; i < sizeof(filename_tags) / sizeof(filename_tags[0]); i++) {
        filename = strstr(headerln, filename_tags[i]);
        if(filename) {
            bff = strstr(filename, filename_denotations[i]) +     
                         strlen(filename_denotations[i]);
            if(bff) {
                if(strchr(bff, (int)'.'))
                {
                    filename = malloc((strlen(bff) * sizeof(char)) + 1);
                    if(!filename) {
                        //malloc error
                    }
                    while(*bff != '.')
                        *filename++ = *bff++;
                    bff = realloc(filename, strlen((filename) + 1) * 
                                  sizeof(char));
                    if(!bff) {
                        //realloc error
                    }
                    filename = bff;
                    *filename = '\0';
                }
            } else
                filename = NULL;
        }
    }
    if(filename) {
        file_info->file_name = malloc((strlen(filename) + 1) * 
                                      sizeof(char));
        if(!file_info->file_name) {
            //malloc error
        }
        strcpy(file_info->file_name, filename);
    }
    return ret;
}

...

/*
 * Will comment later when this function is more finalized
 */
void download_url(char* url)
{
    char *bff = NULL, *temp_file;
    CURL *curl_handle;
    CURLcode res;
    file_info_container file_info;
    FILE *fp;

    bff = tmpnam(NULL);
    if(!bff) {
        //error handling for tmpnam failure
    }
    bff++;
    temp_file = malloc((strlen(bff) + 4) * sizeof(char));
    if(!temp_file) {
        //error handling for malloc failure
    }
    strcpy(temp_file, bff);
    free(bff);
    strcat(temp_file, "lok\0");
    fp = fopen(temp_file, "wb");
    curl_handle = curl_easy_init();
    curl_easy_setopt(curl_handle, CURLOPT_URL, url);
    curl_easy_setopt(curl_handle, CURLOPT_HEADERFUNCTION, header_cb);
    curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, write_cb);
    curl_easy_setopt(curl_handle, CURLOPT_HEADERDATA, fp);
    curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, fp);
    res = curl_easy_perform(curl_handle);
    fclose(fp);
    curl_easy_getinfo(curl_handle, CURLINFO_EFFECTIVE_URL,
                      &file_info.file_url);
    curl_easy_getinfo(curl_handle, CURLINFO_CONTENT_TYPE,
                      &file_info.file_type);
    if(!file_info.file_type) {
        file_info.file_type = malloc(8 * sizeof(char));
        if(!file_info.file_type) {
            //error handling for memory allocation failure
        }
        strcpy(file_info.file_type, "bin/unk\0");
    }
    //build_filename(&file_info);
    //rename the lok file with whatever we get later
    free(temp_file);
    curl_easy_cleanup(curl_handle);
    return 0;
}

主要功能只是调用download_url(url).....

TL;我的问题的DR是为什么,无论我传递给download_url的URL,我从未在包含子字符串"Content-Disposition: "的标题中看到一行?

1 个答案:

答案 0 :(得分:1)

请求 特定文件 服务器正在为您提供所请求的文件 您 已经 知道您请求的文件的名称!

您要求:http://foobar.com/Example.pdf
服务器现在向您发送Example.pdf

的内容

你怎么能说你不知道文件的名称?

如果你依赖Content-Disposition标题,服务器不需要发送它,事实上,很少发送。

Content-Disposition documentation

  

如果用户请求将内容保存到文件,则建议将Content-Disposition response-header字段用作原始服务器建议默认文件名的方法。