PHP:如何消除RollingCurl库中任何挂起的curl_multi请求

时间:2012-12-17 18:36:06

标签: php curl curl-multi

我使用的是http://code.google.com/p/rolling-curl/

的略微修改版本

我的问题与此类似:http://code.google.com/p/rolling-curl/issues/detail?id=25

基本上,我正在做一堆请求,一旦找到了我想要的那个,所有其他待处理请求应该从队列中终止,并且任何正在运行的CURL句柄都应该从内存中删除,即使它们没有完成下载爱好。

代码在这里:

<?php
/*
Authored by Josh Fraser (www.joshfraser.com)
Released under Apache License 2.0

Maintained by Alexander Makarov, http://rmcreative.ru/

$Id$
*/

/**
 * Class that represent a single curl request
 */
class RollingCurlRequest {
    public $url = false;
    public $method = 'GET';
    public $post_data = null;
    public $headers = null;
    public $options = null;
    public $extras = null;

    /**
     * @param string $url
     * @param string $method
     * @param  $post_data
     * @param  $headers
     * @param  $options
     * @return void
     */
    function __construct($url, $method = "GET", $post_data = null, $headers = null, $options = null, $extras = null) {
        $this->url = $url;
        $this->method = $method;
        $this->post_data = $post_data;
        $this->headers = $headers;
        $this->options = $options;
        $this->extras = $extras;
    }

    /**
     * @return void
     */
    public function __destruct() {
        unset($this->url, $this->method, $this->post_data, $this->headers, $this->options);
    }
}

/**
 * RollingCurl custom exception
 */
class RollingCurlException extends Exception {
}

/**
 * Class that holds a rolling queue of curl requests.
 *
 * @throws RollingCurlException
 */
class RollingCurl {
    /**
     * @var int
     *
     * Window size is the max number of simultaneous connections allowed.
     *
     * REMEMBER TO RESPECT THE SERVERS:
     * Sending too many requests at one time can easily be perceived
     * as a DOS attack. Increase this window_size if you are making requests
     * to multiple servers or have permission from the receving server admins.
     */
    private $window_size = 5;

    /**
     * @var float
     *
     * Timeout is the timeout used for curl_multi_select.
     */
    private $timeout = 10;

    /**
     * @var string|array
     *
     * Callback function to be applied to each result.
     */
    private $callback;

    /**
     * @var array
     *
     * Set your base options that you want to be used with EVERY request.
     */
    protected $options = array(
        CURLOPT_SSL_VERIFYPEER => 0,
        CURLOPT_RETURNTRANSFER => 1,
        CURLOPT_CONNECTTIMEOUT => 30,
        CURLOPT_TIMEOUT => 30,
        CURLOPT_ENCODING => "gzip"
    );

    /**
     * @var array
     */
    private $headers = array();

    /**
     * @var Request[]
     *
     * The request queue
     */
    private $requests = array();

    /**
     * @var RequestMap[]
     *
     * Maps handles to request indexes
     */
    public $requestMap = array();

    /**
     * @param  $callback
     * Callback function to be applied to each result.
     *
     * Can be specified as 'my_callback_function'
     * or array($object, 'my_callback_method').
     *
     * Function should take three parameters: $response, $info, $request.
     * $response is response body, $info is additional curl info.
     * $request is the original request
     *
     * @return void
     */
    function __construct($callback = null) {
        $this->callback = $callback;
    }

    /**
     * @param string $name
     * @return mixed
     */
    public function __get($name) {
        return (isset($this->{$name})) ? $this->{$name} : null;
    }

    /**
     * @param string $name
     * @param mixed $value
     * @return bool
     */
    public function __set($name, $value) {
        // append the base options & headers
        if ($name == "options" || $name == "headers") {
            $this->{$name} = $value + $this->{$name};
        } else {
            $this->{$name} = $value;
        }
        return true;
    }

    /**
     * Add a request to the request queue
     *
     * @param Request $request
     * @return bool
     */
    public function add($request) {
        $this->requests[] = $request;
        return true;
    }

    /**
     * Create new Request and add it to the request queue
     *
     * @param string $url
     * @param string $method
     * @param  $post_data
     * @param  $headers
     * @param  $options
     * @return bool
     */
    public function request($url, $method = "GET", $post_data = null, $headers = null, $options = null) {
        $this->requests[] = new RollingCurlRequest($url, $method, $post_data, $headers, $options);
        return true;
    }

    /**
     * Perform GET request
     *
     * @param string $url
     * @param  $headers
     * @param  $options
     * @return bool
     */
    public function get($url, $headers = null, $options = null) {
        return $this->request($url, "GET", null, $headers, $options);
    }

    /**
     * Perform POST request
     *
     * @param string $url
     * @param  $post_data
     * @param  $headers
     * @param  $options
     * @return bool
     */
    public function post($url, $post_data = null, $headers = null, $options = null) {
        return $this->request($url, "POST", $post_data, $headers, $options);
    }

    /**
     * Execute processing
     *
     * @param int $window_size Max number of simultaneous connections
     * @return string|bool
     */
    public function execute($window_size = null) {
        // rolling curl window must always be greater than 1
        if (sizeof($this->requests) == 1) {
            return $this->single_curl();
        } else {
            // start the rolling curl. window_size is the max number of simultaneous connections
            return $this->rolling_curl($window_size);
        }
    }

    /**
     * Performs a single curl request
     *
     * @access private
     * @return string
     */
    private function single_curl() {
        $ch = curl_init();
        $request = array_shift($this->requests);
        $options = $this->get_options($request);
        curl_setopt_array($ch, $options);
        $output = curl_exec($ch);
        $info = curl_getinfo($ch);

        // it's not neccesary to set a callback for one-off requests
        if ($this->callback) {
            $callback = $this->callback;
            if (is_callable($this->callback)) {
                call_user_func($callback, $output, $info, $request);
            }
        }
        else
            return $output;
        return true;
    }

    /**
     * Performs multiple curl requests
     *
     * @access private
     * @throws RollingCurlException
     * @param int $window_size Max number of simultaneous connections
     * @return bool
     */
    private function rolling_curl($window_size = null) {
        if ($window_size)
            $this->window_size = $window_size;

        // make sure the rolling window isn't greater than the # of urls
        if (sizeof($this->requests) < $this->window_size)
            $this->window_size = sizeof($this->requests);

        if ($this->window_size < 2) {
            throw new RollingCurlException("Window size must be greater than 1");
        }

        $master = curl_multi_init();

        // start the first batch of requests
        for ($i = 0; $i < $this->window_size; $i++) {
            $ch = curl_init();

            $options = $this->get_options($this->requests[$i]);

            curl_setopt_array($ch, $options);
            curl_multi_add_handle($master, $ch);

            // Add to our request Maps
            $key = (string) $ch;
            $this->requestMap[$key] = $i;
        }

        do {
            while (($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM) ;
            if ($execrun != CURLM_OK)
                break;
            // a request was just completed -- find out which one
            while ($done = curl_multi_info_read($master)) {

                // get the info and content returned on the request
                $info = curl_getinfo($done['handle']);
                $output = curl_multi_getcontent($done['handle']);

                // send the return values to the callback function.
                $callback = $this->callback;
                if (is_callable($callback)) {
                    $key = (string) $done['handle'];
                    $request = $this->requests[$this->requestMap[$key]];
                    unset($this->requestMap[$key]);
                    call_user_func($callback, $output, $info, $request);
                }

                // start a new request (it's important to do this before removing the old one)
                if ($i < sizeof($this->requests) && isset($this->requests[$i]) && $i < count($this->requests)) {
                    $ch = curl_init();
                    $options = $this->get_options($this->requests[$i]);
                    curl_setopt_array($ch, $options);
                    curl_multi_add_handle($master, $ch);

                    // Add to our request Maps
                    $key = (string) $ch;
                    $this->requestMap[$key] = $i;
                    $i++;
                }

                // remove the curl handle that just completed
                curl_multi_remove_handle($master, $done['handle']);

            }

            // Block for data in / output; error handling is done by curl_multi_exec
            if ($running)
                curl_multi_select($master, $this->timeout);

        } while ($running);
        curl_multi_close($master);
        return true;
    }


    /**
     * Helper function to set up a new request by setting the appropriate options
     *
     * @access private
     * @param Request $request
     * @return array
     */
    private function get_options($request) {
        // options for this entire curl object
        $options = $this->__get('options');
        if (ini_get('safe_mode') == 'Off' || !ini_get('safe_mode')) {
            $options[CURLOPT_FOLLOWLOCATION] = 1;
            $options[CURLOPT_MAXREDIRS] = 5;
        }
        $headers = $this->__get('headers');

        // append custom options for this specific request
        if ($request->options) {
            $options = $request->options + $options;
        }

        // set the request URL
        $options[CURLOPT_URL] = $request->url;

        // posting data w/ this request?
        if ($request->post_data) {
            $options[CURLOPT_POST] = 1;
            $options[CURLOPT_POSTFIELDS] = $request->post_data;
        }
        if ($headers) {
            $options[CURLOPT_HEADER] = 0;
            $options[CURLOPT_HTTPHEADER] = $headers;
        }

        return $options;
    }

    /**
     * @return void
     */
    public function __destruct() {
        unset($this->window_size, $this->callback, $this->options, $this->headers, $this->requests);
    }
}

编辑:第二个想法,应该有两个功能:

1)删除队列中的所有请求,并在不完成的情况下从内存中取消设置任何curl资源。

2)删除队列中的所有请求,直到所有curl资源都完成为止。

我可能会将我在此处获得的功能/代码/建议提供给所选答案的作者。

1 个答案:

答案 0 :(得分:1)

嗯,这是一个非常简单,没有花哨的补丁。

我创建了一个名为cancelRequests()的新函数,它接受一个bool参数,以指示是否应取消所有请求(包括正在处理的请求)或 只删除requests数组的内容(在这种情况下,它将完成当前的下载请求)

以下示例使用已包含的example.php文件的更改版本来显示此工作:

<?php

require("RollingCurl.php");

function create_request_callback($rc) {
    return function ($response, $info, $request) use ($rc) {
        // This will delete all the other requests
        $rc->cancelRequests();
        print_r($info);
        print_r($request);
        echo "<hr>";
    };
}

// one of the google ones will download right away, the other 2 would take some time. The first one to donwload will cancel the rest
$urls = array("http://www.google.com",
              "http://download.thinkbroadband.com/1GB.zip",
              "http://download.thinkbroadband.com/512MB.zip",
              "http://www.google.co.uk"
              );


$rc = new RollingCurl();
$rc->callback = create_request_callback($rc);

$rc->window_size = 20;
foreach ($urls as $url) {
    $request = new RollingCurlRequest($url);
    $rc->add($request);
}
$rc->execute();

正如您所看到的,有必要创建RollingCurl对象,然后再传递回调。原因是能够在回调中引用对象实例。

我使用了rv版svn存储库中的RollingCurl.php文件。

这是差异:

81c81
<     private $callback;
---
>     public $callback;
100a101,114
>      * @var Master[]
>      *
>      * The master handler
>      */
>     public $master = null;
> 
>     /**
>      * @var RequesList[]
>      *
>      * The request list of handles
>      */
>     public $requestList = array();
> 
>     /**
165a180,195
>      * Cancel all requests from the queue
>      *
>      * @param bool $all Delete ALL requests, including the ones currently executing
>      * @return bool
>      */ 
>     public function cancelRequests($all = true) {
>         $this->requests = array();
>         if($all) {
>             foreach($this->requestList as $handler) {
>                 curl_multi_remove_handle($this->master, $handler);
>             }
>         }
>         return true;
>     }
> 
>     /**
267c297
<         $master = curl_multi_init();
---
>         $this->master = curl_multi_init();
276c306,307
<             curl_multi_add_handle($master, $ch);
---
>             curl_multi_add_handle($this->master, $ch);
>             array_push($this->requestList, $ch);
284c315
<             while (($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM) ;
---
>             while (($execrun = curl_multi_exec($this->master, $running)) == CURLM_CALL_MULTI_PERFORM) ;
288c319
<             while ($done = curl_multi_info_read($master)) {
---
>             while ($done = curl_multi_info_read($this->master)) {
308c339,340
<                     curl_multi_add_handle($master, $ch);
---
>                     curl_multi_add_handle($this->master, $ch);
>                     array_push($this->requestList, $ch);
317c349
<                 curl_multi_remove_handle($master, $done['handle']);
---
>                 curl_multi_remove_handle($this->master, $done['handle']);
323c355
<                 curl_multi_select($master, $this->timeout);
---
>                 curl_multi_select($this->master, $this->timeout);
326c358
<         curl_multi_close($master);
---
>         curl_multi_close($this->master);
相关问题