卷毛有一些问题

时间:2011-02-08 10:04:56

标签: php curl

    <?php
$email = "";
$password = "";


function searchstring($string_to_search,$string_start,$string_end)
     {
      if (strpos($string_to_search,$string_start)===false)
          return false;
        if (strpos($string_to_search,$string_end)===false)
          return false;
       $start=strpos($string_to_search,$string_start)+strlen($string_start);$end=strpos($string_to_search,$string_end,$start);
        $return=substr($string_to_search,$start,$end-$start);
      return $return;   
            }

//curl get
function curl_get($ch, $url, $cookiefile) {
    curl_setopt($ch, CURLOPT_URL, $url);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 0);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($ch, CURLOPT_USERAGENT,"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13");
    curl_setopt($ch, CURLOPT_COOKIEJAR, $cookiefile);
    curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiefile);
    //curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
    $data = curl_exec($ch);
    return $data;
}

//curl post

function curl_post( $ch, $url, $cookiefile, $post) {
    curl_setopt($ch, CURLOPT_URL, $url);
    curl_setopt($ch, CURLOPT_POST, 1);
    curl_setopt($ch, CURLOPT_POSTFIELDS, $post);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 0);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($ch, CURLOPT_USERAGENT,"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13");
    curl_setopt($ch, CURLOPT_COOKIEJAR, $cookiefile);
    curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiefile);
    //curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
   $data = curl_exec($ch);
   return $data;
}

//cookie file
$cookiefile = "cookie.txt";

$ch = curl_init();



//get url to grab GALX to login
$data = curl_get($ch, "https://www.google.com/accounts/ServiceLogin?uilel=3&service=youtube&passive=true&continue=http%3A%2F%2Fwww.youtube.com%2Fsignin%3Faction_handle_signin%3Dtrue%26nomobiletemp%3D1%26hl%3Den_US%26next%3D%252Findex&hl=en_US&ltmpl=sso", $cookiefile);

//grab galx token needed to login
preg_match('/name="GALX"\s*value="(.*?)"/', $data, $galx);


//login
$thegalx = $galx[1];

                             $post_elements=array('ltmpl'=>'sso',
                             'continue'=>'http://www.youtube.com/signin?action_handle_signin=true&nomobiletemp=1&hl=en_US&next=%2Findex',
                             'next'=>'/',
                             'service'=>'youtube',
                             'uilel'=>3,
                             'ltmpl'=>'sso',
                             'hl'=>'en_US',
                             'ltmpl'=>'sso',
                             'GALX'=>$thegalx,
                             'Email'=>$email,
                             'Passwd'=>$password,
                             'PersistentCookie'=>'yes',
                             'rmShown'=>1,
                             'signIn'=>'Sign in',
                             'asts'=>false,
                            );



$login = curl_post($ch, "https://www.google.com/accounts/ServiceLoginAuth", $cookiefile, $post_elements);






//auth url
$data = curl_get($ch, "https://www.google.com/accounts/CheckCookie?continue=http%3A%2F%2Fwww.youtube.com%2Fsignin%3Faction_handle_signin%3Dtrue%26nomobiletemp%3D1%26hl%3Den_US%26next%3D%252Findex&hl=en_US&service=youtube&ltmpl=sso&chtml=LoginDoneHtml", $cookiefile);

//grab auth url
$authurl=html_entity_decode(urldecode(str_replace('\x', '%', searchstring($data,'location.replace("','"'))));
//print_r($authurl);



//hit auth url to login
curl_get($ch, $authurl, $cookiefile);


//$ch = curl_init();

//$data = curl_get($ch, "http://www.youtube.com/watch?v=VBqVfmWM1og", $cookiefile);
//print $data;
//doesnt work

// youtube 
$datayoutube = curl_get($ch, "http://www.youtube.com/", $cookiefile);
print_r($datayoutube);

curl_close($ch);

?>

您好,

我已经能够通过抓取GALX值成功登录youtube。当我打印出位于代码底部的http://youtube.com的内容时,它显示正常并显示我已登录..但是如果我打印出视频网址的内容(下面已注释掉)在代码中)页面只是空白和白色..这让我发疯,有人知道为什么会这样吗?

1 个答案:

答案 0 :(得分:0)

我遇到的问题之一是标题,帖子和Cookie。虽然看起来你已经整理了部分cookie,但请确保使用LiveHTTPHeaders(Firefox的扩展)来嗅探并查看发送的其他标题以及是否通过隐藏表单等发送更多帖子数据。 / p>

使用的其他Cookie也会显示在此。虽然卷曲SETOPT_COOKIEJAR主要处理cookie,但由于这种自动化,cookie管理有点困难。

因此,请确保在登录后在youtube上加载视频页面时仔细检查发送的信息,并将这些字段添加到curl。 祝你好运。