cURL:处理多个异步请求

时间:2014-09-05 12:41:36

标签: c++ curl asynchronous httprequest libcurl

我从来没有在c ++中做过任何多线程或异步的事情,到目前为止我只使用cURL来做单个同步请求。
为了更好地可视化我正在尝试做的事情,我写了一个简单的Javascript,它可以用C ++中的cURL做我想做的事。

function AddRequest( method, url, data, id ) {
    var httpObj = new ActiveXObject("Msxml2.XMLHTTP.6.0"); //new XMLHttpRequest();
    httpObj.onreadystatechange = function() {
        if (httpObj.readyState == 4)
            ResponseCallback( httpObj, id );
    };
    httpObj.Open( method, url, true );
    httpObj.Send( data );

}

function ResponseCallback( httpObj, id ) {
    WScript.Echo( id ); //alert( id );
    WScript.Echo( httpObj.ResponseText ); //alert( httpObj.ResponseText );
}

//It could now be used like this:

AddRequest("GET","http://example.com/","",1);
AddRequest("GET","https://www.facebook.com","",2);
WScript.Echo( "all requests sent" ); //alert( "all requests sent" );
//these requests are all done at the same time 
//and every time a request has finished it calls the ResponseCallback() function,
//telling it which request has finished

CURL似乎完全不同,并且比XmlHttpRequest更复杂,即使两者都只是发送http请求...
这是我的第一种方法(基于人类的答案):

#include "stdafx.hpp"
#include <iostream> //#include <stdio.h>
#include <curl.h>
#include <pthread.h>
#include <map>
#include <string>
using namespace std;

bool printing = false; //will allow us to prevent prints overlapping each other

struct requestStruct { //will allow us to pass more than one argument to the threaded functions
    int id;
    const char* url;
    const char* method;
    const char* body;
    map<const char*, const char*> headers;
    const char* proxy;
    int timeout;
};

struct responseStruct { //will allow us to return more than one value from the Request function
    long statusCode;
    //map<const char*, const char*> headers;
    const char* body;
};

size_t writeToString(void *ptr, size_t size, size_t count, void *stream) {
    ((string*)stream)->append((char*)ptr, 0, size* count);
    return size* count;
}

static void *ResponseCallback(int id, struct responseStruct *response) {
    long statusCode = response -> statusCode;
    //map<const char*, const char*> headers = response -> headers;
    const char* body = response -> body;

    //while (printing) {} //wait for other threads to stop printing
    printing = true; //tell other threads to not print anything
      cout << id << " response received! Code: " << statusCode << endl << body << endl;
    printing = false; //tell other threads printing is okay again
    return NULL;
}

struct responseStruct HttpRequest(const char* url, const char* method, const char* body, map<const char*, const char*> &headers, const char* proxy, long timeout) {
    CURL *curl;
    curl = curl_easy_init();

    long statusCode = 0;
    map<const char*, const char*> respHeaders;
    string respBody;

    string _url(url);
    string _method(method);
    string _proxy(proxy);

    struct curl_slist *headerList = NULL;
    string headerString;

    curl_easy_setopt(curl, CURLOPT_URL, url); //set url
    curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, method); //set method
    for (auto header=headers.begin(); header!=headers.end(); ++header) { //make header list
        headerString = header->first;
        headerString.append(": ").append(header->second);
        headerList = curl_slist_append(headerList, headerString.c_str()); 
        //cout << headerString << '\n';
    }
    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headerList); //set headers
    if (_method == "POST" || _method == "PUT" || _method == "DELETE") //set body if the request method would allow it
        curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body);
    if (_url.find(string("https://")) != string::npos) //set ssl verifypeer if it's an https url
        curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
    if (_proxy != "") //set proxy
        curl_easy_setopt(curl, CURLOPT_PROXY, proxy);
    if (timeout != 0) //set timeout
        curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
    curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); //follow redirects

    //curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, writeToString);
    //curl_easy_setopt(curl, CURLOPT_WRITEHEADER, &respHeaders); //to receive response headers
    //??
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeToString);
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, &respBody); //to receive response body

    curl_easy_perform(curl); //send the request

    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &statusCode); //get status code

    struct responseStruct response;
    response.statusCode = statusCode;
    //response.headers;
    response.body = respBody.c_str();

    curl_easy_cleanup(curl);

    return response;
}

static void *AddRequest( void *arguments ) {
    // get arguments:
      struct requestStruct *args = (struct requestStruct*)arguments;
      int id = args->id; 
      const char* url = args->url; 
      const char* method = args->method; 
      const char* body = args->body; 
      map<const char*, const char*> headers = args->headers; 
      const char* proxy = args->proxy; 
      int timeout = args->timeout;

    // print arguments:
      //while (printing) {} //wait for other threads to stop printing
      //printing = true; //tell other threads to not print anything
      //  cout << id << endl << url << endl << method << endl;
      //printing = false; //tell the other threads it's okay to print again now

    struct responseStruct response = HttpRequest(url, method, body, headers, proxy, timeout);

    ResponseCallback(id,&response);

    pthread_exit(0);
    return NULL;
}

int main() {
    //map<const char*, const char*> headers;
    //headers["User-Agent"] = "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0";
    //struct responseStruct response = HttpRequest("https://facebook.com", "GET", "", headers, "localhost:8888", 6000);
    //cout << response.body << endl;

    pthread_t threads[3];
    struct requestStruct reqArguments[3];

    map<const char*, const char*> headers;
    headers["User-Agent"] = "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0";
    const char* proxy = "";

    reqArguments[0].id = 0;
    reqArguments[0].url = "https://www.facebook.com/";
    reqArguments[0].method = "GET";
    reqArguments[0].headers = headers;
    reqArguments[0].body = "";
    reqArguments[0].proxy = proxy;
    reqArguments[0].timeout = 6000;
    pthread_create(&threads[0], NULL, &AddRequest, (void *)&reqArguments[0]); //create a thread on AddRequest() passing a full struct of arguments

    reqArguments[1].id = 1;
    reqArguments[1].url = "https://www.facebook.com/";
    reqArguments[1].method = "GET";
    reqArguments[1].headers = headers;
    reqArguments[1].body = "";
    reqArguments[1].proxy = proxy;
    reqArguments[1].timeout = 6000;
    pthread_create(&threads[1], NULL, &AddRequest, (void *)&reqArguments[1]); //create a thread on AddRequest() passing a full struct of arguments

    reqArguments[2].id = 2;
    reqArguments[2].url = "https://www.facebook.com/";
    reqArguments[2].method = "GET";
    reqArguments[2].headers = headers;
    reqArguments[2].body = "";
    reqArguments[2].proxy = proxy;
    reqArguments[2].timeout = 6000;
    pthread_create(&threads[2], NULL, &AddRequest, (void *)&reqArguments[2]); //create a thread on AddRequest() passing a full struct of arguments

    getchar(); //prevent console from closing instantly
    return 0;
}

我不确定我是否正确地做了整个pthread事情。
有一些问题:
1.出于某种原因,只有第一个请求成功,其他请求甚至没有发送 除非我取消注释主函数的前4行,它将在没有新线程的情况下直接请求,但我显然不想使用该代码。
2. HttpRequest()函数没有正确返回响应html代码,I only receive garbage 我认为问题2可能是与HttpRequest()的返回结构相关的指针问题,但我无法解决它。 :(
3.我的最后一个而不是那么重要的问题是我不知道如何接收响应标题并将它们放在地图中 顺便说一句:我正在使用Visual C ++ 2010编译,我正在使用Fiddler调试http流量。

1 个答案:

答案 0 :(得分:2)

编辑:这是我更正的代码。

没有真正的错误。但经过几次测试后,我发现在同一时间启动几个curl_perform会导致问题。所以我添加了一个延迟(5000ms很大,你可以减少它)。

并且pthread_exit()导致问题出现响应错误。

#include "stdafx.hpp"
#include <iostream> //#include <stdio.h>
#include <curl/curl.h>
#include <pthread.h>
#include <map>
#include <string>
using namespace std;

bool printing = false; //will allow us to prevent prints overlapping each other
#if defined(__WIN32__) || defined(_WIN32) || defined(WIN32) || defined(__WINDOWS__) || defined(__TOS_WIN__)

  #include <windows.h>

  inline void delay( unsigned long ms )
    {
    Sleep( ms );
    }

#else  /* presume POSIX */

  #include <unistd.h>

  inline void delay( unsigned long ms )
    {
    usleep( ms * 1000 );
    }

#endif 


struct requestStruct { //will allow us to pass more than one argument to the threaded functions
    int id;
    const char* url;
    const char* method;
    const char* body;
    map<const char*, const char*> headers;
    const char* proxy;
    int timeout;
};

struct responseStruct { //will allow us to return more than one value from the Request function
    long statusCode;
    //map<const char*, const char*> headers;
    const char* body;
};

size_t writeToString(void *ptr, size_t size, size_t count, void *stream) {
    ((string*)stream)->append((char*)ptr, 0, size* count);
    return size* count;
}

static void *ResponseCallback(int id, struct responseStruct *response) {
    long statusCode = response -> statusCode;
    //map<const char*, const char*> headers = response -> headers;
    const char* body = response -> body;

    //while (printing) {} //wait for other threads to stop printing
    printing = true; //tell other threads to not print anything
      cout << id << " response received! Code: " << statusCode << endl << body << endl;
    printing = false; //tell other threads printing is okay again
    return NULL;
}

struct responseStruct HttpRequest(const char* url, const char* method, const char* body, map<const char*, const char*> &headers, const char* proxy, long timeout) {
    CURL *curl;
    curl = curl_easy_init();

    long statusCode = 0;
    map<const char*, const char*> respHeaders;
    string respBody;

    string _url(url);
    string _method(method);
    string _proxy(proxy);

    struct curl_slist *headerList = NULL;
    string headerString;

    curl_easy_setopt(curl, CURLOPT_URL, url); //set url
    curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, method); //set method
    for (std::map<const char*, const char*>::iterator header=headers.begin(); header!=headers.end(); ++header) { //make header list
        headerString = header->first;
        headerString.append(": ").append(header->second);
        headerList = curl_slist_append(headerList, headerString.c_str()); 
        //cout << headerString << '\n';
    }
    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headerList); //set headers
    if (_method == "POST" || _method == "PUT" || _method == "DELETE") //set body if the request method would allow it
        curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body);
    if (_url.find(string("https://")) != string::npos) //set ssl verifypeer if it's an https url
        curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
    if (_proxy != "") //set proxy
        curl_easy_setopt(curl, CURLOPT_PROXY, proxy);
    if (timeout != 0) //set timeout
        curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
    curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); //follow redirects

    //curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, writeToString);
    //curl_easy_setopt(curl, CURLOPT_WRITEHEADER, &respHeaders); //to receive response headers
    //??
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeToString);
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, &respBody); //to receive response body
    static int i=0;
    delay(5000*(i++));
    std::cout << "url: " << _url << ";" << std::endl;

    curl_easy_perform(curl); //send the request

    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &statusCode); //get status code

    struct responseStruct response;
    response.statusCode = statusCode;
    //response.headers;
    response.body = respBody.c_str();

    curl_easy_cleanup(curl);

    return response;
}

static void *AddRequest( void *arguments ) {
    // get arguments:
      struct requestStruct *args = (struct requestStruct*)arguments;
      int id = args->id; 
      const char* url = args->url; 
      const char* method = args->method; 
      const char* body = args->body; 
      map<const char*, const char*> headers = args->headers; 
      const char* proxy = args->proxy; 
      int timeout = args->timeout;

    // print arguments:
      //while (printing) {} //wait for other threads to stop printing
      //printing = true; //tell other threads to not print anything
      //  cout << id << endl << url << endl << method << endl;
      //printing = false; //tell the other threads it's okay to print again now

      struct responseStruct response = HttpRequest(url, method, body, headers, proxy, timeout);

    ResponseCallback(id,&response);

    /* this code cause trouble (no response code) */
    //pthread_exit(0);
    return NULL;
}

int main() {
    //map<const char*, const char*> headers;
    //headers["User-Agent"] = "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0";
    //struct responseStruct response = HttpRequest("https://facebook.com", "GET", "", headers, "localhost:8888", 6000);
    //cout << response.body << endl;

    pthread_t threads[3];
    struct requestStruct reqArguments[3];

    map<const char*, const char*> headers;
    headers["User-Agent"] = "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0";
    const char* proxy = "";

    reqArguments[0].id = 0;
    reqArguments[0].url = "https://www.duckduckgo.com/";
    reqArguments[0].method = "GET";
    reqArguments[0].headers = headers;
    reqArguments[0].body = "";
    reqArguments[0].proxy = proxy;
    reqArguments[0].timeout = 6000;
    pthread_create(&threads[0], NULL, &AddRequest, (void *)&reqArguments[0]); //create a thread on AddRequest() passing a full struct of arguments

    reqArguments[1].id = 1;
    reqArguments[1].url = "https://www.google.com/";
    reqArguments[1].method = "GET";
    reqArguments[1].headers = headers;
    reqArguments[1].body = "";
    reqArguments[1].proxy = proxy;
    reqArguments[1].timeout = 6000;
    pthread_create(&threads[1], NULL, &AddRequest, (void *)&reqArguments[1]); //create a thread on AddRequest() passing a full struct of arguments

    reqArguments[2].id = 2;
    reqArguments[2].url = "https://www.facebook.com/";
    reqArguments[2].method = "GET";
    reqArguments[2].headers = headers;
    reqArguments[2].body = "";
    reqArguments[2].proxy = proxy;
    reqArguments[2].timeout = 6000;
    pthread_create(&threads[2], NULL, &AddRequest, (void *)&reqArguments[2]); //create a thread on AddRequest() passing a full struct of arguments

    //        getchar();
    // that is cleaner
    for (int i=0; i<3; ++i) {
      int rc = pthread_join(threads[i], NULL);
      printf("In main: thread %d is complete\n", i);
    }

    return 0;
}

关于标题的最后一个问题,请在stackoverflow上发布另一个问题。因为还有很多科目(我认为)。

一点建议,使用对象更容易编写和阅读代码源。

结束编辑

这是使用libcurl进行多线程的官方示例的副本: http://curl.haxx.se/libcurl/c/multithread.html

/***************************************************************************
 *                                  _   _ ____  _
 *  Project                     ___| | | |  _ \| |
 *                             / __| | | | |_) | |
 *                            | (__| |_| |  _ <| |___
 *                             \___|\___/|_| \_\_____|
 *
 * Copyright (C) 1998 - 2011, Daniel Stenberg, <daniel@haxx.se>, et al.
 *
 * This software is licensed as described in the file COPYING, which
 * you should have received as part of this distribution. The terms
 * are also available at http://curl.haxx.se/docs/copyright.html.
 *
 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
 * copies of the Software, and permit persons to whom the Software is
 * furnished to do so, under the terms of the COPYING file.
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 * KIND, either express or implied.
 *
 ***************************************************************************/ 
/* A multi-threaded example that uses pthreads extensively to fetch
 * X remote files at once */ 

#include <stdio.h>
#include <pthread.h>
#include <curl/curl.h>

#define NUMT 4

/*
  List of URLs to fetch.

  If you intend to use a SSL-based protocol here you MUST setup the OpenSSL
  callback functions as described here:

  http://www.openssl.org/docs/crypto/threads.html#DESCRIPTION

*/ 
const char * const urls[NUMT]= {
  "http://curl.haxx.se/",
  "ftp://cool.haxx.se/",
  "http://www.contactor.se/",
  "www.haxx.se"
};

static void *pull_one_url(void *url)
{
  CURL *curl;

  curl = curl_easy_init();
  curl_easy_setopt(curl, CURLOPT_URL, url);
  curl_easy_perform(curl); /* ignores error */ 
  curl_easy_cleanup(curl);

  return NULL;
}


/*
   int pthread_create(pthread_t *new_thread_ID,
   const pthread_attr_t *attr,
   void * (*start_func)(void *), void *arg);
*/ 

int main(int argc, char **argv)
{
  pthread_t tid[NUMT];
  int i;
  int error;

  /* Must initialize libcurl before any threads are started */ 
  curl_global_init(CURL_GLOBAL_ALL);

  for(i=0; i< NUMT; i++) {
    error = pthread_create(&tid[i],
                           NULL, /* default attributes please */ 
                           pull_one_url,
                           (void *)urls[i]);
    if(0 != error)
      fprintf(stderr, "Couldn't run thread number %d, errno %d\n", i, error);
    else
      fprintf(stderr, "Thread %d, gets %s\n", i, urls[i]);
  }

  /* now wait for all threads to terminate */ 
  for(i=0; i< NUMT; i++) {
    error = pthread_join(tid[i], NULL);
    fprintf(stderr, "Thread %d terminated\n", i);
  }

  return 0;
}

为了交互式使用,您可以将urls数组转换为矢量。

我希望它能帮到你!