问题1:使用libcurl下载url时,如何保留下载文件的原始名称? LibCurl要求程序员生成文件名。当URL具有时,这可能很容易 例如在下面的url中,很容易找出目标名称 vimqrc.pdf 。
http://tnerual.eriogerg.free.fr/vimqrc.pdf)
但是当URL动态生成目标名称时,例如,下载URL AdbeRdr1010_eu_ES.exe。与wget(除URL之外没有参数)和curl(参数-O)
http://get.adobe.com/reader/download/?installer=Reader_10.1_Basque_for_Windows&standalone=1%22
curl(-O)或wget如何计算出
的名称//invoked as ./a.out <URL>
#include <stdio.h>
#include <curl/curl.h>
char *location = "/tmp/test/out";
size_t write_data(void *ptr, size_t size, size_t nmemb, FILE *stream) {
size_t written = fwrite(ptr, size, nmemb, stream);
return written;
}
int main(int argc, char *argv[])
{
CURL *curl;
CURLcode res;
int ret = -1;
if (argc!= 2) {
//invoked as ./a.out <URL>
return -1;
}
curl = curl_easy_init();
if (!curl) {
goto bail;
}
FILE *fp = fopen(location, "wb");
curl_easy_setopt(curl, CURLOPT_URL, argv[1]); //invoked as ./a.out <URL>
/* example.com is redirected, so we tell libcurl to follow redirection */
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);
/* Perform the request, res will get the return code */
res = curl_easy_perform(curl);
/* Check for errors */
if(res != CURLE_OK)
fprintf(stderr, "curl_easy_perform() failed: %s\n",
curl_easy_strerror(res));
/* always cleanup */
curl_easy_cleanup(curl);
ret = 0;
fclose(fp);
bail:
return ret;
}
答案 0 :(得分:8)
我在libcurl源代码中找到了答案。看起来像&#34;远程名称&#34;是内容配置的一部分&#34;标题中的标记。 Libcurl正在解析标题并寻找&#34; filename =&#34;在content-disposition标记中。此解析在通过CURLOPT_HEADERFUNCTION选项提供的回调中完成。最后,在写入数据的回调中(通过CURLOPT_WRITEFUNCTION提供),这个远程名称用于创建输出文件。
如果缺少文件名,它只是从URL本身中找出它。这几乎是从lib curl复制的代码,并且对我自己的修改很少,以使其更简单并符合我的要求。
#define _GNU_SOURCE
#include <stdio.h>
#include <curl/curl.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <stdlib.h>
typedef unsigned long uint64_t;
typedef struct {
char dnld_remote_fname[4096];
char dnld_url[4096];
FILE *dnld_stream;
FILE *dbg_stream;
uint64_t dnld_file_sz;
} dnld_params_t;
static int get_oname_from_cd(char const*const cd, char *oname)
{
char const*const cdtag = "Content-disposition:";
char const*const key = "filename=";
int ret = 0;
char *val = NULL;
/* Example Content-Disposition: filename=name1367; charset=funny; option=strange */
/* If filename is present */
val = strcasestr(cd, key);
if (!val) {
printf("No key-value for \"%s\" in \"%s\"", key, cdtag);
goto bail;
}
/* Move to value */
val += strlen(key);
/* Copy value as oname */
while (*val != '\0' && *val != ';') {
//printf (".... %c\n", *val);
*oname++ = *val++;
}
*oname = '\0';
bail:
return ret;
}
static int get_oname_from_url(char const* url, char *oname)
{
int ret = 0;
char const *u = url;
/* Remove "http(s)://" */
u = strstr(u, "://");
if (u) {
u += strlen("://");
}
u = strrchr(u, '/');
/* Remove last '/' */
u++;
/* Copy value as oname */
while (*u != '\0') {
//printf (".... %c\n", *u);
*oname++ = *u++;
}
*oname = '\0';
return ret;
}
size_t dnld_header_parse(void *hdr, size_t size, size_t nmemb, void *userdata)
{
const size_t cb = size * nmemb;
const char *hdr_str= hdr;
dnld_params_t *dnld_params = (dnld_params_t*)userdata;
char const*const cdtag = "Content-disposition:";
/* Example:
* ...
* Content-Type: text/html
* Content-Disposition: filename=name1367; charset=funny; option=strange
*/
if (strstr(hdr_str, "Content-disposition:")) {
printf ("has c-d: %s\n", hdr_str);
}
if (!strncasecmp(hdr_str, cdtag, strlen(cdtag))) {
printf ("Found c-d: %s\n", hdr_str);
int ret = get_oname_from_cd(hdr_str+strlen(cdtag), dnld_params->dnld_remote_fname);
if (ret) {
printf("ERR: bad remote name");
}
}
return cb;
}
FILE* get_dnld_stream(char const*const fname)
{
char const*const pre = "/tmp/";
char out[4096];
snprintf(out, sizeof(out), "%s/%s", pre, fname);
FILE *fp = fopen(out, "wb");
if (!fp) {
printf ("Could not create file %s\n", out);
}
return fp;
}
size_t write_cb(void *buffer, size_t sz, size_t nmemb, void *userdata)
{
int ret = 0;
dnld_params_t *dnld_params = (dnld_params_t*)userdata;
if (!dnld_params->dnld_remote_fname[0]) {
ret = get_oname_from_url(dnld_params->dnld_url, dnld_params->dnld_remote_fname);
}
if (!dnld_params->dnld_stream) {
dnld_params->dnld_stream = get_dnld_stream(dnld_params->dnld_remote_fname);
}
ret = fwrite(buffer, sz, nmemb, dnld_params->dnld_stream);
if (ret == (sz*nmemb)) {
dnld_params->dnld_file_sz += ret;
}
return ret;
}
int download_url(char const*const url)
{
CURL *curl;
int ret = -1;
CURLcode cerr = CURLE_OK;
dnld_params_t dnld_params;
memset(&dnld_params, 0, sizeof(dnld_params));
strncpy(dnld_params.dnld_url, url, strlen(url));
curl = curl_easy_init();
if (!curl) {
goto bail;
}
cerr = curl_easy_setopt(curl, CURLOPT_URL, url);
if (cerr) { printf ("%s: failed with err %d\n", "URL", cerr); goto bail;}
cerr = curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, dnld_header_parse);
if (cerr) { printf ("%s: failed with err %d\n", "HEADER", cerr); goto bail;}
cerr = curl_easy_setopt(curl, CURLOPT_HEADERDATA, &dnld_params);
if (cerr) { printf ("%s: failed with err %d\n", "HEADER DATA", cerr); goto bail;}
cerr = curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
if (cerr) { printf ("%s: failed with err %d\n", "WR CB", cerr); goto bail;}
cerr = curl_easy_setopt(curl, CURLOPT_WRITEDATA, &dnld_params);
if (cerr) { printf ("%s: failed with err %d\n", "WR Data", cerr); goto bail;}
cerr = curl_easy_perform(curl);
if(cerr != CURLE_OK) {
fprintf(stderr, "curl_easy_perform() failed: %s\n", curl_easy_strerror(cerr));
}
printf ("Remote name: %s\n", dnld_params.dnld_remote_fname);
fclose(dnld_params.dnld_stream);
/* always cleanup */
curl_easy_cleanup(curl);
ret = 0;
printf ("file size : %lu\n", dnld_params.dnld_file_sz);
bail:
return ret;
}
int main(int argc, char *argv[])
{
if (argc != 2) {
printf ("Bad args\n");
return -1;
}
return download_url(argv[1]);
}
答案 1 :(得分:-2)
它是你的程序,而不是确定文件名的libcurl。在您的示例中,您可以简单地将char *location = "/tmp/test/out";
更改为char *location = "/tmp/test/vimqrc.pdf";
以获得所需的效果。
如果您想以编程方式获取下载文件路径给定url和父目录,您可以执行以下操作:
int url_to_location(char* location, unsigned int location_length, const char* url, const char* parent_directory)
{
//char location[MAX_PATH];
//const char *url = "http://tnerual.eriogerg.free.fr/vimqrc.pdf";
//const char *parent_directory = "/tmp/test/";
int last_slash_index = -1;
int current_index = (int)strlen(url);
while (current_index >= 0)
{
if (url[current_index] == '/')
{
last_slash_index = current_index;
break;
}
current_index--;
}
unsigned int parent_directory_length = strlen(parent_directory)
if (parent_directory_length <= location_length)
return -1;
strcpy(location, parent_directory);
if (last_slash_index == -1) //no slashes found, use relative url as filename
{
if (parent_directory_length + strlen(url) <= location_length)
return -1;
strcat(location, url);
}
else //use the characters of the url following the last slash as filename
{
if (parent_directory_length + strlen(url + last_slash_index + 1) <= location_length)
return -1;
strcat(location, url + last_slash_index + 1);
}
return strlen(location);
}