我写了一个PHP脚本,它通过libcurl检索数据并对其进行处理。它工作正常,但出于性能原因,我将其更改为使用了数十个工作者(线程)。性能提高了50多倍,但是现在php.exe每隔几分钟崩溃一次,并且列出的错误模块是php_curl.dll。我之前在C中有过多线程的经验,但之前在php中根本没用过它。
我google了一下,据说cURL是线程安全的(截至2001年): http://curl.haxx.se/mail/lib-2001-01/0001.html 但是我找不到关于php_curl是否是线程安全的提及。
如果重要,我从命令行运行php。我的设置是Win7 x64,PHP 5.5.11线程安全VC11 x86,PHP pthreads 2.0.4 for PHP 5.5线程安全VC11 x86。
这是一些伪代码来展示我在做什么
class MyWorker extends Worker
{
...
public function run()
{
...
while(1)
{
...
runCURL();
...
sleep(1);
}
}
}
function runCURL()
{
static $curlHandle = null;
...
if(is_null($curlHandle))
{
$curlHandle = curl_init();
curl_setopt($curlHandle, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($curlHandle, CURLOPT_USERAGENT, "My User Agent String");
}
curl_setopt($curlHandle, CURLOPT_URL, "The URL");
curl_setopt($curlHandle, CURLOPT_POSTFIELDS, $data);
curl_setopt($curlHandle, CURLOPT_HTTPHEADER, $header);
curl_setopt($curlHandle, CURLOPT_SSL_VERIFYPEER, false);
$result = curl_exec($curlHandle);
...
}
答案 0 :(得分:8)
首先,resource
类型正式不受pthreads的支持; curl句柄是resource
,因此您不应将curl句柄存储在pthreads
个对象的对象范围内,因为它们可能会被损坏。
pthreads提供了一种使用工作人员的简便方法......
在许多线程中执行的最简单方法是使用pthreads提供的内置Pool
类:
以下代码演示了如何在一些后台线程中汇集一堆请求:
<?php
define("LOG", Mutex::create());
function slog($message, $args = []) {
$args = func_get_args();
if (($message = array_shift($args))) {
Mutex::lock(LOG);
echo vsprintf("{$message}\n", $args);
Mutex::unlock(LOG);
}
}
class Request extends Threaded {
public function __construct($url, $post = []) {
$this->url = $url;
$this->post = $post;
}
public function run() {
$curl = curl_init();
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_URL, $this->url);
if ($this->post) {
curl_setopt($curl, CURLOPT_POSTFIELDS, $this->post);
}
$response = curl_exec($curl);
slog("%s returned %d bytes", $this->url, strlen($response));
}
public function getURL() { return $this->url; }
public function getPost() { return $this->post; }
protected $url;
protected $post;
}
$max = 100;
$urls = [];
while (count($urls) < $max) {
$urls[] = sprintf(
"http://www.google.co.uk/?q=%s",
md5(mt_rand()*count($urls)));
}
$pool = new Pool(4);
foreach ($urls as $url) {
$pool->submit(new Request($url));
}
$pool->shutdown();
Mutex::destroy(LOG);
?>
您的特定任务要求您现在处理数据,您可以将此功能写入如上所述的设计......或
promises是一种非常奇特的并发形式......
承诺适合这里任务的性质:
以下代码显示了如何使用pthreads/promises
发出相同的请求和处理响应:
<?php
namespace {
require_once("vendor/autoload.php");
use pthreads\PromiseManager;
use pthreads\Promise;
use pthreads\Promisable;
use pthreads\Thenable;
define("LOG", Mutex::create());
function slog($message, $args = []) {
$args = func_get_args();
if (($message = array_shift($args))) {
Mutex::lock(LOG);
echo vsprintf("{$message}\n", $args);
Mutex::unlock(LOG);
}
}
/* will be used by everything to report errors when they occur */
trait ErrorManager {
public function onError(Promisable $promised) {
slog("Oh noes: %s\n", (string) $promised->getError());
}
}
class Request extends Promisable {
use ErrorManager;
public function __construct($url, $post = []) {
$this->url = $url;
$this->post = $post;
$this->done = false;
}
public function onFulfill() {
$curl = curl_init();
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_URL, $this->url);
if ($this->post) {
curl_setopt($curl, CURLOPT_POSTFIELDS, $this->post);
}
$this->response = curl_exec($curl);
}
public function getURL() { return $this->url; }
public function getPost() { return $this->post; }
public function getResponse() { return $this->response; }
public function setGarbage() { $this->garbage = true; }
public function isGarbage() { return $this->garbage; }
protected $url;
protected $post;
protected $response;
protected $garbage;
}
class Process extends Thenable {
use ErrorManager;
public function onFulfilled(Promisable $request) {
slog("%s returned %d bytes\n",
$request->getURL(), strlen($request->getResponse()));
}
}
/* some dummy urls */
$max = 100;
$urls = [];
while (count($urls) < $max) {
$urls[] = sprintf(
"http://www.google.co.uk/?q=%s",
md5(mt_rand()*count($urls)));
}
/* initialize manager for promises */
$manager = new PromiseManager(4);
/* create promises to make and process requests */
while (@++$id < $max) {
$promise = new Promise($manager, new Request($urls[$id], []));
$promise->then(
new Process($promise));
}
/* force the manager to shutdown (fulfilling all promises first) */
$manager->shutdown();
/* destroy mutex */
Mutex::destroy(LOG);
}
?>
作曲:
{
"require": {
"krakjoe/promises": ">=1.0.2"
}
}
请注意Request
几乎没有改变,所有添加的内容都是保存响应的地方,也是检测对象是否为垃圾的方法。
有关池中垃圾收集的详细信息,适用于这两个示例:
slog
函数仅用于使记录的输出可读
pthreads不是新的PDO驱动程序......
许多人使用pthreads
接近使用新的PDO驱动程序 - 假设它像PHP的其余部分一样工作,一切都会好的。
一切都可能不太好,需要研究:我们正在努力推动这方面的工作,一些&#34;限制&#34;必须放在pthreads的架构上以保持稳定性,这可能会产生一些奇怪的副作用。
虽然pthreads附带详尽的文档,其中大部分都包含PHP手册中的示例,但我还是无法在手册中附加以下文档。
以下文档为您提供了对pthreads内部的理解,每个人都应该阅读它,它为您编写 。