我正在尝试登录某个网站,然后调用大量网址来获取源并抓取图片。它使用常规curl工作正常,但当我尝试使用multi_curl时,我得到了完全相同的响应。所以我只需要在我恢复curl资源时登录(这可以正常卷曲),我认为这可能是它返回相同响应的原因。
有没有人知道如何使用multi_curl但首先进行身份验证?
以下是我正在使用的代码:
<?php
// LICENSE: PUBLIC DOMAIN
// The author disclaims copyright to this source code.
// AUTHOR: Shailesh N. Humbad
// SOURCE: http://www.somacon.com/p539.php
// DATE: 6/4/2008
// index.php
// Run the parallel get and print the total time
$s = microtime(true);
// Define the URLs
$urls = array(
"http://localhost/r.php?echo=request1",
"http://localhost/r.php?echo=request2",
"http://localhost/r.php?echo=request3"
);
$pg = new ParallelGet($urls);
print "<br />total time: ".round(microtime(true) - $s, 4)." seconds";
// Class to run parallel GET requests and return the transfer
class ParallelGet
{
function __construct($urls)
{
// Create get requests for each URL
$mh = curl_multi_init();
$count = 0;
$ch = curl_init();
foreach($urls as $i => $url)
{
$count++;
if($count == 1)
{
// SET URL FOR THE POST FORM LOGIN
curl_setopt($ch, CURLOPT_URL, 'https://www.example.com/login.php');
// ENABLE HTTP POST
curl_setopt ($ch, CURLOPT_POST, 1);
// SET POST PARAMETERS : FORM VALUES FOR EACH FIELD
curl_setopt ($ch, CURLOPT_POSTFIELDS, 'user=myuser&password=mypassword');
// IMITATE CLASSIC BROWSER'S BEHAVIOUR : HANDLE COOKIES
curl_setopt ($ch, CURLOPT_COOKIEJAR, realpath($_SERVER['DOCUMENT_ROOT']) . '/cookie.txt');
# Setting CURLOPT_RETURNTRANSFER variable to 1 will force cURL
# not to print out the results of its query.
# Instead, it will return the results as a string return value
# from curl_exec() instead of the usual true/false.
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
// EXECUTE 1st REQUEST (FORM LOGIN)
curl_exec ($ch);
}
$ch = curl_init($url);
curl_setopt ($ch, CURLOPT_COOKIEFILE, realpath($_SERVER['DOCUMENT_ROOT']) . '/cookie.txt');
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
$ch_array[$i] = $ch;
curl_multi_add_handle($mh, $ch_array[$i]);
}
// Start performing the request
do {
$execReturnValue = curl_multi_exec($mh, $runningHandles);
} while ($execReturnValue == CURLM_CALL_MULTI_PERFORM);
// Loop and continue processing the request
while ($runningHandles && $execReturnValue == CURLM_OK) {
// Wait forever for network
$numberReady = curl_multi_select($mh);
if ($numberReady != -1) {
// Pull in any new data, or at least handle timeouts
do {
$execReturnValue = curl_multi_exec($mh, $runningHandles);
} while ($execReturnValue == CURLM_CALL_MULTI_PERFORM);
}
}
// Check for any errors
if ($execReturnValue != CURLM_OK) {
trigger_error("Curl multi read error $execReturnValue\n", E_USER_WARNING);
}
// Extract the content
foreach($urls as $i => $url)
{
// Check for errors
$curlError = curl_error($ch_array[$i]);
if($curlError == "") {
$res[$i] = curl_multi_getcontent($ch_array[$i]);
} else {
print "Curl error on handle $i: $curlError\n";
}
// Remove and close the handle
curl_multi_remove_handle($mh, $ch_array[$i]);
curl_close($ch_array[$i]);
}
// Clean up the curl_multi handle
curl_multi_close($mh);
// Print the response data
print_r($res);
}
}
?>
答案 0 :(得分:0)
您还需要启用/使用curl的cookie。在文档中查找它,不要忘记创建具有curl读写权限的cookie(空文件)。
$cookie = tempnam ("/tmp", "CURLCOOKIE"); $ch = curl_init(); curl_setopt( $ch, CURLOPT_URL, $url ); curl_setopt( $ch, CURLOPT_COOKIEJAR, $cookie );