使用cURL,PHP和Twitter而不使用API​​的逻辑

时间:2017-07-03 22:05:02

标签: php curl twitter

我有这段代码,它将cookies保存在.txt文件中,并通过Twitter验证用户

<?php

require_once 'class/Cookies.php';

$cookie = [];

$username = 'formUser';
$password = 'formPass';

$index_url = 'https://twitter.com';

$token = curl_init();
curl_setopt_array($token, [
      CURLOPT_URL             => $index_url,
      CURLOPT_CUSTOMREQUEST   => 'GET',
      CURLOPT_RETURNTRANSFER  => true,
      CURLOPT_SSL_VERIFYPEER  => false,
      CURLOPT_SSL_VERIFYHOST  => 2,
      CURLOPT_USERAGENT       => $_SERVER['HTTP_USER_AGENT'],
      //CURLOPT_COOKIEFILE      => __DIR__ . DIRECTORY_SEPARATOR . 'cookies' . DIRECTORY_SEPARATOR . $username . '.txt',
      CURLOPT_COOKIEJAR       => __DIR__ . DIRECTORY_SEPARATOR . 'cookies' . DIRECTORY_SEPARATOR . $username . '.txt',
      CURLOPT_COOKIESESSION   => true,
      CURLOPT_REFERER         => $index_url,
      CURLOPT_HEADER          => true,
      CURLOPT_HTTPHEADER      => ['Cookie:' . http_build_query($cookie, '', ';') . ';'],
      CURLOPT_HEADERFUNCTION => function ($curl, $header) use (&$cookie) {
        if (stripos($header, 'Set-Cookie:') === 0) {
          if (preg_match('/Set-Cookie:\s?(.*?)=(.*?);/i', $header, $matches)) {
            $cookie[$matches[1]] = urldecode($matches[2]);
          }
        }
        return strlen($header);
      }
    ]
);    
$access = curl_exec($token);

preg_match('/value="(.*?)" name="authenticity_token"/', $access, $matches);

$authenticity_token = $matches[1];

//var_dump($authenticity_token);

$session_post = "session[username_or_email]=$username&session[password]=$password&return_to_ssl=true&scribe_log=&redirect_after_login=%2F&authenticity_token=$authenticity_token";



$session_url = 'https://twitter.com/sessions';

curl_setopt_array($token, [
      CURLOPT_URL             => $session_url,
      CURLOPT_CUSTOMREQUEST   => 'POST',
      CURLOPT_POSTFIELDS      => $session_post,
      CURLOPT_RETURNTRANSFER  => true,
      CURLOPT_HTTPHEADER      => [
        "Content-type: application/x-www-form-urlencoded",
        'Cookie: '. http_build_query($cookie, '', ';').';'
      ],
      CURLOPT_USERAGENT       => $_SERVER['HTTP_USER_AGENT'],
      CURLOPT_HEADER          => true,
      CURLOPT_FOLLOWLOCATION  => true,
      CURLOPT_MAXREDIRS       => 2,
      CURLOPT_REDIR_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS,
      CURLOPT_POSTREDIR       => 2,
      CURLOPT_AUTOREFERER     => 1
  ]

);
$auth = curl_exec($token);

var_dump($cookie);

if (isset($cookie['auth_token']))
{
  $twid = filter_var($cookie['twid'], FILTER_SANITIZE_NUMBER_INT);

  Cookies::set('login_token', $cookie['ct0']);
  Cookies::set('kdt', $cookie['kdt']);
  Cookies::set('user_id', $twid);
  Cookies::set('auth_token', $cookie['auth_token']);
  Cookies::set('username', $username);


  echo json_encode(array(
    "status"      => "success",
    "message"     => "Authentication successful, we are redirecting you.",
  ));
}
else
{
  echo json_encode(
    array(
      "status" => "error",
      'message'=> "Unable to authenticate with Twitter.",
    ));
}

此代码捕获登录用户的信息:

<?php

$username = 'sessionUser';

$url = 'https://twitter.com/' . $username;

$user = curl_init();
curl_setopt_array($user, [
      CURLOPT_URL             => $url,
      CURLOPT_CUSTOMREQUEST   => 'GET',
      CURLOPT_CAINFO          => 'cacert-2017-06-07.pem',
      CURLOPT_RETURNTRANSFER  => true,
      CURLOPT_SSL_VERIFYPEER  => false,
      CURLOPT_SSL_VERIFYHOST  => 2,
      CURLOPT_HTTPHEADER      => [
        "Content-type:text/html;charset=utf-8",
      ],
      CURLOPT_USERAGENT       => $_SERVER['HTTP_USER_AGENT'],
      CURLOPT_HEADER          => true,
      CURLOPT_FOLLOWLOCATION  => true,
      CURLOPT_MAXREDIRS       => 2,
      CURLOPT_REDIR_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS,
      CURLOPT_POSTREDIR       => 2,
      CURLOPT_AUTOREFERER     => 1,
      CURLOPT_ENCODING        => "gzip"
  ]
);

$user_info = curl_exec($user);

$header_size = curl_getinfo($user, CURLINFO_HEADER_SIZE);
$header = substr($user_info, 0, $header_size);
$body = substr($user_info, $header_size);

$dom = new DOMDocument("5.0", "utf-8");
@$dom->loadHTML($body);

$data = json_decode($dom->getElementById("init-data")->getAttribute("value"));

echo "Nome: ", $data->profile_user->id, PHP_EOL;
echo "Nome: ", $data->profile_user->name, PHP_EOL;
echo "Usuário: ", $data->profile_user->screen_name, PHP_EOL;
echo "Foto de perfil: ", $data->profile_user->profile_image_url, PHP_EOL;

我需要帮助,以便保存在.txt或数据库中的用户的cookie可以让关注者进行交换。

我该怎么做?

修改

谁是downvote,留下评论。

编辑2

档案follow.php

<?php

require_once '../modules/config.php';
require_once '../modules/class/Cookies.php';


$username = Cookies::get('username');

$friend_url = 'https://api.twitter.com/1.1/friendships/create.json';

$friend = curl_init();

curl_setopt_array($friend, [
        CURLOPT_URL             => $friend_url,
        CURLOPT_SSL_VERIFYPEER  => 1,
        CURLOPT_SSL_VERIFYHOST  => 2,
        CURLOPT_CAINFO          => ROOT . 'modules' . SEPARATOR . 'cacert' . SEPARATOR . 'cacert-2017-06-07.pem',
        CURLOPT_CUSTOMREQUEST   => 'POST',
        CURLOPT_POSTFIELDS      => 'screen_name=' . $username,
        CURLOPT_USERAGENT       => $_SERVER['HTTP_USER_AGENT'],
        CURLOPT_RETURNTRANSFER  => true,
        CURLOPT_HTTPHEADER      => [
            "Content-type: application/json; charset=utf-8",
      ],
        CURLOPT_HEADER          => true,
    ]

);

$response  = curl_exec($friend);

var_dump($response);

响应:

C:\wamp64\www\brfollow\api\follow.php:32:string 'HTTP/1.1 400 Bad Request
content-length: 62
content-type: application/json; charset=utf-8
date: Fri, 07 Jul 2017 08:09:54 GMT
server: tsa_d
set-cookie: guest_id=v1%3A149941499419523606; Domain=.twitter.com; Path=/; Expires=Sun, 07-Jul-2019 08:09:54 UTC
strict-transport-security: max-age=631138519
x-connection-hash: 9e951d1215095efa246c5b852acd2e8a
x-response-time: 131
x-tsa-request-body-time: 0

{"errors":[{"code":215,"message":"Bad Authentication data."}]}' (length=472)

3 个答案:

答案 0 :(得分:2)

你必须使用

CURLOPT_COOKIEJAR       => __DIR__ . DIRECTORY_SEPARATOR . 'cookies' . DIRECTORY_SEPARATOR . $username . '.txt',

在你的第二个请求中。

答案 1 :(得分:2)

首先注意您现有的代码: 不要对GETPOST请求使用CURLOPT_CUSTOMREQUEST。对于GET,请使用CURLOPT_HTTPGET=>true(另请注意,GET是libcurl的默认请求),对于POST请求,请使用CURLOPT_POST=>true

此行preg_match('/value="(.*?)" name="authenticity_token"/', $access, $matches); 如果他们在值和名称之间添加任何其他属性,将会中断, 如果他们只是将名称移到价值后面,它甚至会破裂,它会破裂  如果他们在评论(<!-- --> - 样式)中放置一个simmilar字符串,如果他们只是在值和名称之间放置另一个空格,它甚至会中断,   and parsing HTML with regex is generally a bad idea

一种更强大的方法是:

$authenticity_token=(new DOMXpath(@DOMDocument::loadHTML($access)))->query("//input[@name='authenticity_token']")->item(0)->getAttribute("value");

在这一行中,你犯了同样的错误3次:

$session_post = "session[username_or_email]=$username&session[password]=$password&return_to_ssl=true&scribe_log=&redirect_after_login=%2F&authenticity_token=$authenticity_token";

你没有urlencode $ username,$ password和$ authenticity_token。这意味着,如果其中任何一个包含application/x-www-urlencoded格式中具有特殊含义的任何字符,服务器将获取错误的数据(包括空格,&=,{{1} },[ÆØ以及许多其他字符),简单的解决方案是在它们上使用urlencode() - 漂亮的解决方案是使用http_build_query来制作字符串,如下所示:

Å

你也犯了同样的错误:

$session_post = http_build_query ( array (
        'session' => array (
                'username_or_email' => $username,
                'password' => $password 
        ),
        'return_to_sssl' => true,
        'scribe_log' => '',
        'redirect_after_login' => '/',
        'authenticity_token' => $authenticity_token 
) );

并且必须错误添加此行:

    CURLOPT_POSTFIELDS      => 'screen_name=' . $username,

它是一个没有请求体的GET请求,因此没有 CURLOPT_HTTPHEADER => [ "Content-type:text/html;charset=utf-8", ], ,因为没有内容,所以内容类型头部声明不应该在那里,摆脱它。

这一行

content-type
如果curl没有用gzip编译,

将破坏你的代码,并且服务器实际上决定使用gzip(更具体地说,你将得到难以理解的二进制数据),并且你没有提供实际处理gzip二进制数据的代码。一个更强大的方法是将它设置为emptystring CURLOPT_ENCODING => "gzip" ,然后curl将提供libcurl编译的所有编码,并将动态对其进行解码(包括gzip,如果编译成。通常""gzip,但它也具有前瞻性,因为它会自动添加任何未来的编码)

这一行

deflate

不要自动添加此标头。 libcurl将自动检测 "Content-type: application/x-www-form-urlencoded", application/x-www-urlencoded编码,并自动设置相应的multipart/form-data标头。与你不同的是,libcurl不会犯这样的错别字。

现在,下一步是获取所有当前关注者,并发送关注请求。你说你不想使用api,但是没有办法避免它(没有黑客攻击Twitter数据库,ofc),甚至tiwtter的javascript的“关注”按钮使用api。好消息是,你可以使用twitter的javascript的api令牌逃脱,因此不必使用你自己的令牌。这在理论上听起来很容易,但实际上并非如此。从来没有,这是一个使用hhb_curl(来自https://github.com/divinity76/hhb_.inc.php/blob/master/hhb_.inc.php)的示例实现,获取您的关注者列表,并使用twitters自己的api密钥(自动提取)向每个关注者发送跟随请求:

content-type

输出: <?php declare(strict_types = 1); require_once ('hhb_.inc.php'); const USERNAME = '???'; const PASSWORD = '???'; $hc = new hhb_curl ( 'https://twitter.com/login', true ); $hc->exec (); // get csrf token $csrf_token = [ ]; preg_match ( '/\s+ct0\s*=\s*(.*?)\;/', implode ( "\n", $hc->getResponseHeaders () ), $csrf_token ); if (count ( $csrf_token ) !== 2) { throw new Exception ( 'failed to extract the csrf token!' ); } $csrf_token = $csrf_token [1]; // to log in... $html = $hc->getStdOut (); $domd = @DOMDocument::loadHTML ( $html ); $inputs = getDOMDocumentFormInputs ( $domd, true ) [0]; // << not sure why, but they have 6 seemingly duplicate login forms. the first 1 works fine. $inputs = DOMInputsToArray ( $inputs ); $inputs ['session[username_or_email]'] = USERNAME; $inputs ['session[password]'] = PASSWORD; // hhb_var_dump ( $inputs ) & die (); $html = $hc->setopt_array ( array ( CURLOPT_POST => true, CURLOPT_POSTFIELDS => http_build_query ( $inputs ), CURLOPT_URL => 'https://twitter.com/sessions' ) )->exec ()->getResponseBody (); $domd = @DOMDocument::loadHTML ( $html ); $xpath = new DOMXPath ( $domd ); // hhb_var_dump ( $hc->getStdErr (), $hc->getStdOut () ); if (false !== stripos ( $hc->getinfo ( CURLINFO_EFFECTIVE_URL ), 'login/error' )) { throw new Exception ( 'failed to login!' ); } echo "logged in!", PHP_EOL; // now to get the api key $js = $hc->exec ( 'https://abs.twimg.com/k/en/init.en.c5a67fc1f42cedcdbbcd.js' )->getResponseBody (); // hhb_var_dump ( $hc->getStdErr (), $hc->getStdOut () ) & die (); // fragile regex: assumes that there's only 1x i="114 characters"; , and that the api key is exactly 114 characters. preg_match ( '/i\s*\=\s*\"([^\"]{114})\"\s*\;/iu', $js, $matches ); // hhb_var_dump ( $matches ) & die (); if (count ( $matches ) !== 2) { throw new RuntimeException ( 'failed to extract the api auth key!' ); } $api_auth_key = $matches [1]; $myurl = 'https://twitter.com/' . ltrim ( $xpath->query ( '//a[contains(@class,\'DashboardProfileCard\')]' )->item ( 0 )->getAttribute ( "href" ), '/' ); echo 'myurl: ' . $myurl . PHP_EOL; // $myurl = 'https://twitter.com/scaleway'; $myurl .= '/followers'; $html = $hc->exec ( $myurl )->getResponseBody (); // hhb_var_dump ( $hc->getStdErr (), $hc->getStdOut () ) & die (); $toFollow = array (); $domd = @DOMDocument::loadHTML ( $html ); $xpath = new DOMXPath ( $domd ); foreach ( $xpath->query ( '//div[contains(@class,\'ProfileCard-content\')]' ) as $followerDiv ) { $name = $xpath->query ( './/*[@data-screen-name]', $followerDiv )->item ( 0 )->getAttribute ( "data-screen-name" ); $user_id = $xpath->query ( './/*[@data-user-id]', $followerDiv )->item ( 0 )->getAttribute ( "data-user-id" ); echo "following " . $name . ' (' . $user_id . ')' . PHP_EOL; try { $hc->setopt_array ( array ( CURLOPT_CUSTOMREQUEST => 'OPTIONS', CURLOPT_URL => 'https://api.twitter.com/1.1/friendships/create.json', CURLOPT_HTTPHEADER => array ( 'Access-Control-Request-Method: POST', 'Access-Control-Request-Headers: authorization,x-csrf-token,x-twitter-active-user,x-twitter-auth-type', 'DNT: 1', 'Origin: https://twitter.com' ) ) )->exec (); } catch ( Throwable $ex ) { // there is a bug where it sometimes respond http 200 OK, but with 0 bytes content. hhb_curl doesn't like this, as 0-bytes-responses should actually be http 201. // feel free to contact twitter with a bugreport. } // hhb_var_dump ( $hc->getStdErr () ); $hc->setopt ( CURLOPT_CUSTOMREQUEST, NULL ); $hc->setopt_array ( array ( CURLOPT_POST => true, CURLOPT_URL => 'https://api.twitter.com/1.1/friendships/create.json', CURLOPT_POSTFIELDS => http_build_query ( array ( 'challenges_passed' => 'false', 'handles_challenges' => '1', 'impression_id' => '', 'include_blocked_by' => 'true', 'include_blocking' => 'true', 'include_can_dm' => 'true', 'include_followed_by' => 'true', 'include_mute_edge' => 'true', 'skip_status' => 'true', 'user_id' => $user_id ) ), CURLOPT_HTTPHEADER => array ( 'Accept: application/json, text/javascript, */*; q=0.01', 'Accept-Language: en-US,en;q=0.5', 'Authorization: Bearer ' . $api_auth_key, 'x-twitter-auth-type: OAuth2Session', 'x-csrf-token: ' . $csrf_token, 'X-Twitter-Active-User: yes', 'DNT: 1', 'Origin: https://twitter.com', 'Referer: ' . $myurl ) ) )->exec (); // parse_str ( $hc->getopt ( CURLOPT_POSTFIELDS ), $fields ); // hhb_var_dump ( $fields, $hc->getStdErr (), $hc->getStdOut () ) & die (); } // hhb_var_dump ( $myurl ); function DOMInputsToArray($inputs): array { $ret = [ ]; foreach ( $inputs as $in ) { if ($in->hasAttribute ( "disabled" )) { continue; } $name = $in->getAttribute ( "name" ); if (empty ( $name )) { continue; } $ret [$name] = $in->getAttribute ( "value" ); } return $ret; } function getDOMDocumentFormInputs(\DOMDocument $domd, bool $getOnlyFirstMatches = false): array { // :DOMNodeList? $forms = $domd->getElementsByTagName ( 'form' ); $parsedForms = array (); $isDescendantOf = function (\DOMNode $decendant, \DOMNode $ele): bool { $parent = $decendant; while ( NULL !== ($parent = $parent->parentNode) ) { if ($parent === $ele) { return true; } } return false; }; // i can't use array_merge on DOMNodeLists :( $merged = function () use (&$domd): array { $ret = array (); foreach ( $domd->getElementsByTagName ( "input" ) as $input ) { $ret [] = $input; } foreach ( $domd->getElementsByTagName ( "textarea" ) as $textarea ) { $ret [] = $textarea; } return $ret; }; $merged = $merged (); foreach ( $forms as $form ) { $inputs = function () use (&$domd, &$form, &$isDescendantOf, &$merged): array { $ret = array (); foreach ( $merged as $input ) { // hhb_var_dump ( $input->getAttribute ( "name" ), $input->getAttribute ( "id" ) ); if ($input->hasAttribute ( "disabled" )) { // ignore disabled elements? continue; } $name = $input->getAttribute ( "name" ); if ($name === '') { // echo "inputs with no name are ignored when submitted by mainstream browsers (presumably because of specs)... follow suite?", PHP_EOL; continue; } if (! $isDescendantOf ( $input, $form ) && $form->getAttribute ( "id" ) !== '' && $input->getAttribute ( "form" ) !== $form->getAttribute ( "id" )) { // echo "this input does not belong to this form.", PHP_EOL; continue; } if (! array_key_exists ( $name, $ret )) { $ret [$name] = array ( $input ); } else { $ret [$name] [] = $input; } } return $ret; }; $inputs = $inputs (); // sorry about that, Eclipse gets unstable on IIFE syntax. $hasName = true; $name = $form->getAttribute ( "id" ); if ($name === '') { $name = $form->getAttribute ( "name" ); if ($name === '') { $hasName = false; } } if (! $hasName) { $parsedForms [] = array ( $inputs ); } else { if (! array_key_exists ( $name, $parsedForms )) { $parsedForms [$name] = array ( $inputs ); } else { $parsedForms [$name] [] = $tmp; } } } unset ( $form, $tmp, $hasName, $name, $i, $input ); if ($getOnlyFirstMatches) { foreach ( $parsedForms as $key => $val ) { $parsedForms [$key] = $val [0]; } unset ( $key, $val ); foreach ( $parsedForms as $key1 => $val1 ) { foreach ( $val1 as $key2 => $val2 ) { $parsedForms [$key1] [$key2] = $val2 [0]; } } } return $parsedForms; } (我在^ C手动取消它,因为我不想跟随所有人,但它足以证明它有效) - 并且不要忘记在第4行和第4行替换用户名/密码。第5行 - 并注意,由于我没有任何关注者,我使用https://twitter.com/scaleway/followers作为一个有很多粉丝的测试页面,你可以看到我在注释掉的第50行伪造了网址。 - 同样,它可能只是向你可以在关注者页面上看到的所有关注者发送请求,如果你有很多关注者,这不是完整的列表(这会使你的浏览器崩溃等),所以你必须找到如果你有大量的粉丝,如何获得完整的粉丝列表 -

答案 2 :(得分:1)

这违反了Twitter的开发者政策,您的应用程序和IP风险将被禁止进入该平台。此外,Twitter的规则明确禁止您正在构建的应用程序 - 请参阅https://support.twitter.com/articles/20171936