Question

我是C编程和libcurl的初学者，编写程序从网站获取1000个数据值。该网站提供了一个工作号，并被重定向到另一个页面以获得结果。因为，我写的代码差不多是500行，我给出了程序的一般流程和一个简短的代码，我认为这是一个有问题的领域：

  for(row=0;row<1000;row++)
  {
   ------ 
    url = "http://example.com";

    curl_global_init(CURL_GLOBAL_ALL);
curlHandle = curl_easy_init(); 
if(curlHandle)
{
    curl_easy_setopt(curlHandle, CURLOPT_TIMEOUT, 1800);
    curl_easy_setopt(curlHandle, CURLOPT_ERRORBUFFER, curlErrStr);
    curl_easy_setopt(curlHandle, CURLOPT_FOLLOWLOCATION, 1);
    curl_easy_setopt(curlHandle, CURLOPT_URL, url);
    curl_easy_setopt(curlHandle, CURLOPT_LOW_SPEED_LIMIT, dl_lowspeed_bytes);    
    curl_easy_setopt(curlHandle, CURLOPT_LOW_SPEED_TIME, dl_lowspeed_time);
    curl_easy_setopt(curlHandle, CURLOPT_VERBOSE, 1L);
    free(url);
    curlErr = curl_easy_perform(curlHandle);    
    if(curlErr != CURLE_OK)
    {
        fprintf(stderr, "curl_easy_perform() failed: %s\n",curl_easy_strerror(curlErr));
    }
    else 
    {
        curlErr = curl_easy_getinfo(curlHandle, CURLINFO_EFFECTIVE_URL, &url_new);
        if((CURLE_OK == curlErr) && url_new)
        {
            sprintf(job,"%.*s\n", 18, url_new + 28);
            if((ptr1 = strchr(job, '\n')) != NULL)
                *ptr1 = '\0';
            init_string(&s);
            curl_easy_setopt(curlHandle, CURLOPT_TIMEOUT, 1800 );
            curl_easy_setopt(curlHandle, CURLOPT_URL, url_new);
            curl_easy_setopt(curlHandle, CURLOPT_WRITEFUNCTION, writefunc);
            curl_easy_setopt(curlHandle, CURLOPT_WRITEDATA, &s);
            curlErr1 = curl_easy_perform(curlHandle);
            printf("###### %lu\t%s\n",strlen(s.ptr),s.ptr);
            free(s.ptr);
        }
        curl_easy_cleanup(curlHandle);
    }
}

功能是：

  struct string
  {
   char *ptr;
   size_t len;
  };

  void init_string(struct string *a)
  {
   a->len = 0;
   a->ptr = malloc(a->len+1);
   if (a->ptr == NULL)
   {
    fprintf(stderr, "malloc() failed\n");
            exit(EXIT_FAILURE);
   }
   a->ptr[0] = '\0';
  }

  size_t writefunc(void *ptr, size_t size, size_t nmemb, struct string *a)
  {
   size_t new_len = a->len + size*nmemb;
   a->ptr = realloc(a->ptr, new_len+1);
   if (a->ptr == NULL)
   {
    fprintf(stderr, "realloc() failed\n");
    exit(EXIT_FAILURE);
   }
   memcpy(a->ptr+a->len, ptr, size*nmemb);
   a->ptr[new_len] = '\0';
   a->len = new_len;
   return size*nmemb;
  }

该程序没有显示任何错误。但是在1000个数据中，由于curl_easy_perform（）失败，几乎有50％无法获取：达到了超时;其中20％具有行strlen（s.ptr）的输出，s.ptr =＆gt;其余的都是正确的。

零输出的详细选项给出了以下内容：

连接＃0以托管www.example.com保持原样
getaddrinfo（3）失败：80
无法解析主持人''
关闭连接＃1
无法解析主机名 0

请建议程序中可能存在的错误。

Answer 1

Here is how I would fetch data using cURL


static CURL *curl = NULL;

CURL *initCURL(void)
{
    curl_global_init(CURL_GLOBAL_DEFAULT);

    curl = curl_easy_init();
    if(curl)  
    { 
        // now set all the desired options
        curl_easy_setopt(curl, CURLOPT_URL, "http://example.com");
        /* example.com is redirected, so we tell libcurl to follow redirection */ 
        curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);

        // etc
    }
    else
    { // else cURL object creation failed
      // display appropriate error message
    } 
}

void endCurl(void)
{
    // and then when all done with the cURL object,
    // cleanup
    curl_easy_cleanup(curl);
}


CURLcode execCurl( CURL *curl )
{  
    CURLcode res;
    // Perform this request, for each fetch

    res = curl_easy_perform(curl);

    // Check for errors
    if(res != CURLE_OK)
    {
       fprintf(stderr, "curl_easy_perform() failed: %s\n",
          curl_easy_strerror(res));
    }
    return( res );
}


Note:
I have had this same problem with the cURL timeout occurring.
The best recovery method I found is:
when a timeout occurs, retry the communication, requesting the same data

在C中使用多个卷曲进行循环

1 个答案: