我使用的是http://code.google.com/p/rolling-curl/
的略微修改版本我的问题与此类似:http://code.google.com/p/rolling-curl/issues/detail?id=25
基本上,我正在做一堆请求,一旦找到了我想要的那个,所有其他待处理请求应该从队列中终止,并且任何正在运行的CURL句柄都应该从内存中删除,即使它们没有完成下载爱好。
代码在这里:
<?php
/*
Authored by Josh Fraser (www.joshfraser.com)
Released under Apache License 2.0
Maintained by Alexander Makarov, http://rmcreative.ru/
$Id$
*/
/**
* Class that represent a single curl request
*/
class RollingCurlRequest {
public $url = false;
public $method = 'GET';
public $post_data = null;
public $headers = null;
public $options = null;
public $extras = null;
/**
* @param string $url
* @param string $method
* @param $post_data
* @param $headers
* @param $options
* @return void
*/
function __construct($url, $method = "GET", $post_data = null, $headers = null, $options = null, $extras = null) {
$this->url = $url;
$this->method = $method;
$this->post_data = $post_data;
$this->headers = $headers;
$this->options = $options;
$this->extras = $extras;
}
/**
* @return void
*/
public function __destruct() {
unset($this->url, $this->method, $this->post_data, $this->headers, $this->options);
}
}
/**
* RollingCurl custom exception
*/
class RollingCurlException extends Exception {
}
/**
* Class that holds a rolling queue of curl requests.
*
* @throws RollingCurlException
*/
class RollingCurl {
/**
* @var int
*
* Window size is the max number of simultaneous connections allowed.
*
* REMEMBER TO RESPECT THE SERVERS:
* Sending too many requests at one time can easily be perceived
* as a DOS attack. Increase this window_size if you are making requests
* to multiple servers or have permission from the receving server admins.
*/
private $window_size = 5;
/**
* @var float
*
* Timeout is the timeout used for curl_multi_select.
*/
private $timeout = 10;
/**
* @var string|array
*
* Callback function to be applied to each result.
*/
private $callback;
/**
* @var array
*
* Set your base options that you want to be used with EVERY request.
*/
protected $options = array(
CURLOPT_SSL_VERIFYPEER => 0,
CURLOPT_RETURNTRANSFER => 1,
CURLOPT_CONNECTTIMEOUT => 30,
CURLOPT_TIMEOUT => 30,
CURLOPT_ENCODING => "gzip"
);
/**
* @var array
*/
private $headers = array();
/**
* @var Request[]
*
* The request queue
*/
private $requests = array();
/**
* @var RequestMap[]
*
* Maps handles to request indexes
*/
public $requestMap = array();
/**
* @param $callback
* Callback function to be applied to each result.
*
* Can be specified as 'my_callback_function'
* or array($object, 'my_callback_method').
*
* Function should take three parameters: $response, $info, $request.
* $response is response body, $info is additional curl info.
* $request is the original request
*
* @return void
*/
function __construct($callback = null) {
$this->callback = $callback;
}
/**
* @param string $name
* @return mixed
*/
public function __get($name) {
return (isset($this->{$name})) ? $this->{$name} : null;
}
/**
* @param string $name
* @param mixed $value
* @return bool
*/
public function __set($name, $value) {
// append the base options & headers
if ($name == "options" || $name == "headers") {
$this->{$name} = $value + $this->{$name};
} else {
$this->{$name} = $value;
}
return true;
}
/**
* Add a request to the request queue
*
* @param Request $request
* @return bool
*/
public function add($request) {
$this->requests[] = $request;
return true;
}
/**
* Create new Request and add it to the request queue
*
* @param string $url
* @param string $method
* @param $post_data
* @param $headers
* @param $options
* @return bool
*/
public function request($url, $method = "GET", $post_data = null, $headers = null, $options = null) {
$this->requests[] = new RollingCurlRequest($url, $method, $post_data, $headers, $options);
return true;
}
/**
* Perform GET request
*
* @param string $url
* @param $headers
* @param $options
* @return bool
*/
public function get($url, $headers = null, $options = null) {
return $this->request($url, "GET", null, $headers, $options);
}
/**
* Perform POST request
*
* @param string $url
* @param $post_data
* @param $headers
* @param $options
* @return bool
*/
public function post($url, $post_data = null, $headers = null, $options = null) {
return $this->request($url, "POST", $post_data, $headers, $options);
}
/**
* Execute processing
*
* @param int $window_size Max number of simultaneous connections
* @return string|bool
*/
public function execute($window_size = null) {
// rolling curl window must always be greater than 1
if (sizeof($this->requests) == 1) {
return $this->single_curl();
} else {
// start the rolling curl. window_size is the max number of simultaneous connections
return $this->rolling_curl($window_size);
}
}
/**
* Performs a single curl request
*
* @access private
* @return string
*/
private function single_curl() {
$ch = curl_init();
$request = array_shift($this->requests);
$options = $this->get_options($request);
curl_setopt_array($ch, $options);
$output = curl_exec($ch);
$info = curl_getinfo($ch);
// it's not neccesary to set a callback for one-off requests
if ($this->callback) {
$callback = $this->callback;
if (is_callable($this->callback)) {
call_user_func($callback, $output, $info, $request);
}
}
else
return $output;
return true;
}
/**
* Performs multiple curl requests
*
* @access private
* @throws RollingCurlException
* @param int $window_size Max number of simultaneous connections
* @return bool
*/
private function rolling_curl($window_size = null) {
if ($window_size)
$this->window_size = $window_size;
// make sure the rolling window isn't greater than the # of urls
if (sizeof($this->requests) < $this->window_size)
$this->window_size = sizeof($this->requests);
if ($this->window_size < 2) {
throw new RollingCurlException("Window size must be greater than 1");
}
$master = curl_multi_init();
// start the first batch of requests
for ($i = 0; $i < $this->window_size; $i++) {
$ch = curl_init();
$options = $this->get_options($this->requests[$i]);
curl_setopt_array($ch, $options);
curl_multi_add_handle($master, $ch);
// Add to our request Maps
$key = (string) $ch;
$this->requestMap[$key] = $i;
}
do {
while (($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM) ;
if ($execrun != CURLM_OK)
break;
// a request was just completed -- find out which one
while ($done = curl_multi_info_read($master)) {
// get the info and content returned on the request
$info = curl_getinfo($done['handle']);
$output = curl_multi_getcontent($done['handle']);
// send the return values to the callback function.
$callback = $this->callback;
if (is_callable($callback)) {
$key = (string) $done['handle'];
$request = $this->requests[$this->requestMap[$key]];
unset($this->requestMap[$key]);
call_user_func($callback, $output, $info, $request);
}
// start a new request (it's important to do this before removing the old one)
if ($i < sizeof($this->requests) && isset($this->requests[$i]) && $i < count($this->requests)) {
$ch = curl_init();
$options = $this->get_options($this->requests[$i]);
curl_setopt_array($ch, $options);
curl_multi_add_handle($master, $ch);
// Add to our request Maps
$key = (string) $ch;
$this->requestMap[$key] = $i;
$i++;
}
// remove the curl handle that just completed
curl_multi_remove_handle($master, $done['handle']);
}
// Block for data in / output; error handling is done by curl_multi_exec
if ($running)
curl_multi_select($master, $this->timeout);
} while ($running);
curl_multi_close($master);
return true;
}
/**
* Helper function to set up a new request by setting the appropriate options
*
* @access private
* @param Request $request
* @return array
*/
private function get_options($request) {
// options for this entire curl object
$options = $this->__get('options');
if (ini_get('safe_mode') == 'Off' || !ini_get('safe_mode')) {
$options[CURLOPT_FOLLOWLOCATION] = 1;
$options[CURLOPT_MAXREDIRS] = 5;
}
$headers = $this->__get('headers');
// append custom options for this specific request
if ($request->options) {
$options = $request->options + $options;
}
// set the request URL
$options[CURLOPT_URL] = $request->url;
// posting data w/ this request?
if ($request->post_data) {
$options[CURLOPT_POST] = 1;
$options[CURLOPT_POSTFIELDS] = $request->post_data;
}
if ($headers) {
$options[CURLOPT_HEADER] = 0;
$options[CURLOPT_HTTPHEADER] = $headers;
}
return $options;
}
/**
* @return void
*/
public function __destruct() {
unset($this->window_size, $this->callback, $this->options, $this->headers, $this->requests);
}
}
编辑:第二个想法,应该有两个功能:
1)删除队列中的所有请求,并在不完成的情况下从内存中取消设置任何curl资源。
2)删除队列中的所有请求,直到所有curl资源都完成为止。
我可能会将我在此处获得的功能/代码/建议提供给所选答案的作者。
答案 0 :(得分:1)
嗯,这是一个非常简单,没有花哨的补丁。
我创建了一个名为cancelRequests()
的新函数,它接受一个bool参数,以指示是否应取消所有请求(包括正在处理的请求)或
只删除requests
数组的内容(在这种情况下,它将完成当前的下载请求)
以下示例使用已包含的example.php
文件的更改版本来显示此工作:
<?php
require("RollingCurl.php");
function create_request_callback($rc) {
return function ($response, $info, $request) use ($rc) {
// This will delete all the other requests
$rc->cancelRequests();
print_r($info);
print_r($request);
echo "<hr>";
};
}
// one of the google ones will download right away, the other 2 would take some time. The first one to donwload will cancel the rest
$urls = array("http://www.google.com",
"http://download.thinkbroadband.com/1GB.zip",
"http://download.thinkbroadband.com/512MB.zip",
"http://www.google.co.uk"
);
$rc = new RollingCurl();
$rc->callback = create_request_callback($rc);
$rc->window_size = 20;
foreach ($urls as $url) {
$request = new RollingCurlRequest($url);
$rc->add($request);
}
$rc->execute();
正如您所看到的,有必要创建RollingCurl
对象,然后再传递回调。原因是能够在回调中引用对象实例。
我使用了rv版svn存储库中的RollingCurl.php
文件。
这是差异:
81c81
< private $callback;
---
> public $callback;
100a101,114
> * @var Master[]
> *
> * The master handler
> */
> public $master = null;
>
> /**
> * @var RequesList[]
> *
> * The request list of handles
> */
> public $requestList = array();
>
> /**
165a180,195
> * Cancel all requests from the queue
> *
> * @param bool $all Delete ALL requests, including the ones currently executing
> * @return bool
> */
> public function cancelRequests($all = true) {
> $this->requests = array();
> if($all) {
> foreach($this->requestList as $handler) {
> curl_multi_remove_handle($this->master, $handler);
> }
> }
> return true;
> }
>
> /**
267c297
< $master = curl_multi_init();
---
> $this->master = curl_multi_init();
276c306,307
< curl_multi_add_handle($master, $ch);
---
> curl_multi_add_handle($this->master, $ch);
> array_push($this->requestList, $ch);
284c315
< while (($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM) ;
---
> while (($execrun = curl_multi_exec($this->master, $running)) == CURLM_CALL_MULTI_PERFORM) ;
288c319
< while ($done = curl_multi_info_read($master)) {
---
> while ($done = curl_multi_info_read($this->master)) {
308c339,340
< curl_multi_add_handle($master, $ch);
---
> curl_multi_add_handle($this->master, $ch);
> array_push($this->requestList, $ch);
317c349
< curl_multi_remove_handle($master, $done['handle']);
---
> curl_multi_remove_handle($this->master, $done['handle']);
323c355
< curl_multi_select($master, $this->timeout);
---
> curl_multi_select($this->master, $this->timeout);
326c358
< curl_multi_close($master);
---
> curl_multi_close($this->master);