我一直试图使用cURL从这个网站获取一些页面数据。页面是授权,使用cookie,没有SSL 我检查了很多手册和设置php cURL脚本的例子,但似乎都没有。
每次运行脚本时,cookie文件都会更新,但结果是空字符串。如果我将CURLOPT_FOLLOWLOCATION设置为1,我会得到登录页面。所以我假设,原始脚本返回重定向回登录页面 我试过搞乱CURLOPT_USERAGENT,CURLOPT_REFERER,但它没有帮助。
另外,如果我手动设置CURLOPT_COOKIE,PHPSESSID(使用浏览器和人工输入的真实登录会话),它可以正常工作。
所以,这是我的代码:
<?php
set_time_limit(10);
define('USERNAME', 'username');
define('PASSWORD', 'password');
define('USER_AGENT', 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2309.372 Safari/537.36');
define('COOKIE_FILE', 'cookie.txt');
define('LOGIN_FORM_URL', 'http://website/auth');
define('LOGIN_ACTION_URL', 'http://website/distribution/index');
$postValues = array(
'login_msisdn' => USERNAME,
'password' => PASSWORD
);
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, LOGIN_FORM_URL);
curl_setopt($curl, CURLOPT_POST, true);
curl_setopt($curl, CURLOPT_POSTFIELDS, http_build_query($postValues));
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_COOKIEJAR, realpath(COOKIE_FILE));
//curl_setopt($curl, CURLOPT_COOKIE, "PHPSESSID=relkdrgg94gfdgfg834g");
curl_setopt($curl, CURLOPT_USERAGENT, USER_AGENT);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_REFERER, LOGIN_FORM_URL);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 0);
curl_exec($curl);
if(curl_errno($curl)){
throw new Exception(curl_error($curl));
}
curl_close($curl);
$curl = curl_init()
curl_setopt($curl, CURLOPT_URL, LOGIN_ACTION_URL);
curl_setopt($curl, CURLOPT_COOKIEFILE, realpath(COOKIE_FILE));
curl_setopt($curl, CURLOPT_USERAGENT, USER_AGENT);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
$html = curl_exec($curl);
echo $html;
答案 0 :(得分:2)
您只是在第一次卷曲调用时保存cookie(使用CURLOPT_COOKIEJAR
),但在第二次卷曲调用期间不加载。这就是为什么在第二次通话中没有使用cookie的原因。在第二次卷曲调用时使用以下内容。
curl_setopt($curl, CURLOPT_COOKIEFILE, realpath(COOKIE_FILE));
其次,在进行第二次卷曲请求之前,您必须关闭卷曲手并再次初始化它。 curl选项CURLOPT_COOKIEJAR
有助于将cookie保存到文件中,但是当curl句柄关闭时它会这样做。
curl_close($curl);
$curl = curl_init();
// here goes the second one
curl_setopt($curl, CURLOPT_URL, LOGIN_ACTION_URL);
此外,正如评论中所建议的那样(我已经错过了),启用选项CURLOPT_RETURNTRANSFER
,以便curl返回输出。
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
另请注意,请使用cookie文件的完整路径。否则,当您从浏览器运行时它可能不起作用(同时确保您对cookie文件具有写入权限)。
define('COOKIE_FILE', '/some/directory/cookie.txt');
答案 1 :(得分:0)
对于使用CURL保存和使用cookie文件,我使用以下代码:
$ckfile = tempnam('/tmp', 'CURLCOOKIE');
curl_setopt($curl, CURLOPT_COOKIEJAR, $ckfile);
curl_setopt($curl, CURLOPT_COOKIEFILE, $ckfile);
答案 2 :(得分:0)
/*
1) Make first request in main page and after do the login
2) I added some headers
3) Check if are all parameters in post ( ex: "&login=Submit" )
4) If is basic authorization use curl_setopt($curl, CURLOPT_USERPWD, "$username:$password");
5) Debug header/ errors ...
*/
$url1 = "http://website/";
$url2 = "http://website/auth";
$url3 = "http://website/distribution/index";
$user = "username";
$pass = "password";
$post = "user=".$user."&pass=".$pass;
get_url($url1,'',$url1);
$login = get_url($url2,$post,$url1);
$data = get_url($url3,'',$url1);
print_r($data);
function get_url($url,$post,$refer) {
$ssl = substr(strtolower($url),0,8)=='https://' ? true : false;
$cookie = getcwd().DIRECTORY_SEPARATOR.'cookie.txt';
$header[0] = "text/xml,application/xml,application/xhtml+xml,";
$header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
$header[] = "Cache-Control: max-age=0";
$header[] = "Connection: keep-alive";
$header[] = "Keep-Alive: 300";
$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$header[] = "Accept-Language: en-us,en;q=0.5";
$header[] = "Pragma: ";
$agent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36";
$refer = !empty($refer) ? $refer : "http://www.google.com/";
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_USERAGENT, $agent);
if( !empty($post) ) {
curl_setopt($curl, CURLOPT_POST, 1);
curl_setopt($curl, CURLOPT_POSTFIELDS, $post);
}
if( $ssl ) {
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
}
curl_setopt($curl, CURLOPT_HTTPHEADER, $header);
curl_setopt($curl, CURLOPT_REFERER, $refer);
curl_setopt($curl, CURLOPT_COOKIEJAR, $cookie);
curl_setopt($curl, CURLOPT_COOKIEFILE, $cookie);
curl_setopt($curl, CURLOPT_AUTOREFERER, true);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($curl, CURLOPT_TIMEOUT,60);
$html = curl_exec($curl);
$info = curl_getinfo($curl);
$error = '';
if( $html === false ) {
$error = 'Curl error: ' . curl_error($curl);
}
curl_close($curl);
$arr = array();
$arr['html'] = $html;
$arr['info'] = $info;
$arr['error'] = $error;
return $arr;
}