我正在尝试登录MCA门户网站(POST网址:http://www.mca.gov.in/mcafoportal/loginValidateUser.do)
我尝试使用Google Chrome上的POSTMAN应用登录,效果很好。但是,它在PHP / Python中都不起作用。我无法通过PHP / Python登录
这是PHP代码:
$url="http://www.mca.gov.in/mcafoportal/loginValidateUser.do";
$post_fields = array();
$post_fields['userNamedenc']='hGJfsdnk`1t';
$post_fields['passwordenc']='675894242fa9c66939d9fcf4d5c39d1830f4ddb9';
$post_fields['accessCode'] = ""
$str = call_post_mca($url, $post_fields);
$str = str_replace(" ","",$str);
$dom = new DOMDocument();
$dom->loadHTML($str);
$xpath = new DOMXPath($dom);
$input_id = '//input[@id="login_accessCode"]/@value';
$input_val = $xpath->query($input_id)->item(0);
$input_val1 = $input_val->nodeValue;
$url="http://www.mca.gov.in/mcafoportal/loginValidateUser.do";
$post_fields['userNamedenc']='hGJfsdnk`1t';
$post_fields['passwordenc']='675894242fa9c66939d9fcf4d5c39d1830f4ddb9';
$post_fields['accessCode'] = $input_val1; //New Accesscode
function call_post_mca($url, $params)
{
#$user_agent = getRandomUserAgent();
$user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36";
$str = "";
foreach($params as $key=>$value)
{
$str = $str . "$key=$value" . "&";
}
$postData = rtrim($str, "&");
$ch = curl_init();
curl_setopt($ch,CURLOPT_URL,$url);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,true);
curl_setopt($ch,CURLOPT_HEADER, false);
#curl_setopt($ch, CURLOPT_CAINFO, DOC_ROOT . '/includes/cacert.pem');
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $postData);
curl_setopt($ch,CURLOPT_USERAGENT, $user_agent);
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt ($ch, CURLOPT_REFERER, $url);
$cookie= DOC_ROOT . "/cookie.txt";
curl_setopt ($ch, CURLOPT_COOKIEJAR, $cookie);
curl_setopt ($ch, CURLOPT_COOKIEFILE, $cookie);
$output=curl_exec($ch);
curl_close($ch);
return $output;
}
知道缺少什么吗?
答案 0 :(得分:3)
网站会进行重定向,因此您需要添加
CURLOPT_FOLLOWLOCATION => 1
到您的选项数组。如果对cURL有疑问,请尝试
$status = curl_getinfo($curl);
echo json_encode($status, JSON_PRETTY_PRINT);
给予:
{
"url": "http:\/\/www.mca.gov.in\/mcafoportal\/loginValidateUser.do?userNamedenc=hGJfsdnk%601t&passwordenc=675894242fa9c66939d9fcf4d5c39d1830f4ddb9&accessCode=-825374456",
"content_type": "text\/plain",
"http_code": 302,
"header_size": 1560,
"request_size": 245,
"filetime": -1,
"ssl_verify_result": 0,
"redirect_count": 0,
"total_time": 1.298891,
"namelookup_time": 0.526375,
"connect_time": 0.999786,
"pretransfer_time": 0.999844,
"size_upload": 0,
"size_download": 0,
"speed_download": 0,
"speed_upload": 0,
"download_content_length": 0,
"upload_content_length": -1,
"starttransfer_time": 1.298875,
"redirect_time": 0,
"redirect_url": "http:\/\/www.mca.gov.in\/mcafoportal\/login.do",
"primary_ip": "115.114.108.120",
"certinfo": [],
"primary_port": 80,
"local_ip": "192.168.1.54",
"local_port": 62524
}
如您所见,您获得了302
重定向状态,但redirect_count
为0
。添加选项后,我得到:
{
"url": "http:\/\/www.mca.gov.in\/mcafoportal\/login.do",
"content_type": "text\/html;charset=ISO-8859-1",
"http_code": 200,
"header_size": 3131,
"request_size": 376,
"filetime": -1,
"ssl_verify_result": 0,
"redirect_count": 1,
"total_time": 2.383609,
"namelookup_time": 1.7e-5,
"connect_time": 1.7e-5,
"pretransfer_time": 4.4e-5,
"size_upload": 0,
"size_download": 42380,
"speed_download": 17779,
"speed_upload": 0,
"download_content_length": 42380,
"upload_content_length": -1,
"starttransfer_time": 0.30734,
"redirect_time": 0.915858,
"redirect_url": "",
"primary_ip": "14.140.191.120",
"certinfo": [],
"primary_port": 80,
"local_ip": "192.168.1.54",
"local_port": 62642
}
编辑 url对请求参数进行编码,然后按照重定向
进行操作 $str = urlencode("userNamedenc=hGJfsdnk%601t&passwordenc=675894242fa9c66939d9fcf4d5c39d1830f4ddb9&accessCode=-825374456");
curl_setopt_array(
$curl , array (
CURLOPT_URL => "http://www.mca.gov.in/mcafoportal/loginValidateUser.do" , // <- removed parameters here
CURLOPT_RETURNTRANSFER => true ,
CURLOPT_ENCODING => "" ,
CURLOPT_FOLLOWLOCATION => 1 ,
CURLOPT_MAXREDIRS => 10 ,
CURLOPT_TIMEOUT => 30 ,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1 ,
CURLOPT_CUSTOMREQUEST => "POST" ,
CURLOPT_POSTFIELDS => $str, // <- added this here
CURLOPT_HTTPHEADER => array (
"cache-control: no-cache"
) ,
)
);
答案 1 :(得分:3)
老实说,这是我很长一段时间见过的奇怪网站之一。首先要知道它是如何工作的。所以我决定使用chrome,看看当我们使用错误的数据登录时会发生什么
观察:
userNamedenc
和respectively
因此解决问题的方法是遵循以下步骤
login.do
loginValidateUser.do
表单发送以下参数
现在,一个有趣的部分是在帖子数据之下
displayCaptcha:true
userEnteredCaptcha:strrty
如果我们将displayCaptcha
覆盖为false,则不再需要验证码。这是一次精彩的绕行
displayCaptcha: false
接下来是在PHP中对上述所有内容进行编码,但该网站似乎很奇怪,许多尝试都失败了。所以最后我意识到我们需要更接近浏览器登录,并且我觉得需要在呼叫之间延迟
<?php
require_once("curl.php");
$curl = new CURL();
$default_headers = Array(
"Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Encoding" => "deflate",
"Accept-Language" => "en-US,en;q=0.8",
"Cache-Control" => "no-cache",
"Connection" => "keep-alive",
"DNT" => "1",
"Pragma" => "no-cache",
"Referer" => "http://www.mca.gov.in/mcafoportal/login.do",
"Upgrade-Insecure-Requests" => "1"
);
// Get the login page
$curl
->followlocation(0)
->cookieejar("")
->verbose(1)
->get("http://www.mca.gov.in/mcafoportal/login.do")
->header($default_headers)
->useragent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36")
->execute();
// Save the postfileds and access code as we would need them later for the POST field
$post = $curl->loadInputFieldsFromResponse()
->updatePostParameter(array(
"displayCaptcha" => "false",
"userNamedenc" => "hGJfsdnk`1t",
"passwordenc" => "675894242fa9c66939d9fcf4d5c39d1830f4ddb9",
"userName" => "",
"Cert" => ""))
->referrer("http://www.mca.gov.in/mcafoportal/login.do")
->removePostParameters(
Array("dscBasedLoginFlag", "maxresults", "fe", "query", "SelectCert", "newUserRegistration")
);
$postfields = $curl->getPostFields();
var_dump($postfields);
// Access some dummy URLs to make it look like browser
$curl
->get("http://www.mca.gov.in/mcafoportal/js/global.js")->header($default_headers)->execute()->sleep(2)
->get("http://www.mca.gov.in/mcafoportal/js/loginValidations.js")->header($default_headers)->execute()->sleep(2)
->get("http://www.mca.gov.in/mcafoportal/css/layout.css")->header($default_headers)->execute()->sleep(2)
->get("http://www.mca.gov.in/mcafoportal/img/bullet.png")->header($default_headers)->execute()->sleep(2)
->get("http://www.mca.gov.in/mcafoportal/getCapchaImage.do")->header($default_headers)->execute()->sleep(2);
// POST to the login form the postfields saved earlier
$curl
->sleep(20)
->header($default_headers)
->postfield($postfields)
->referrer("http://www.mca.gov.in/mcafoportal/login.do")
->post("http://www.mca.gov.in/mcafoportal/loginValidateUser.do")
->execute(false)
->sleep(3)
->get("http://www.mca.gov.in/mcafoportal/login.do")
->header($default_headers)
->execute(true);
// Get the response from last GET of login.do
$curl->getResponseText($output);
//Check if user name is present in the output or not
if (stripos($output, "Kiran") > 0) {
echo "Hurray!!!! Login succeeded";
} else {
echo "Login failed please retry after sometime";
}
运行代码后,它运行了几次,几次没有运行。我的观察
我创建并用于链接方法的可重用curl.php
位于
<?php
class CURL
{
protected $ch;
protected $postfields;
public function getPostFields() {
return $this->postfields;
}
public function newpost()
{
$this->postfields = array();
return $this;
}
public function addPostFields($key, $value)
{
$this->postfields[$key]=$value;
return $this;
}
public function __construct()
{
$ch = curl_init();
$this->ch = $ch;
$this->get()->followlocation()->retuntransfer(); //->connectiontimeout(20)->timeout(10);
}
function url($url)
{
curl_setopt($this->ch, CURLOPT_URL, $url);
return $this;
}
function verbose($value = true)
{
curl_setopt($this->ch, CURLOPT_VERBOSE, $value);
return $this;
}
function post($url='')
{
if ($url !== '')
$this->url($url);
curl_setopt($this->ch, CURLOPT_POST, count($this->postfields));
curl_setopt($this->ch, CURLOPT_POSTFIELDS, http_build_query($this->postfields));
return $this;
}
function postfield($fields)
{
if (is_array($fields)){
$this->postfields = $fields;
}
return $this;
}
function close()
{
curl_close($this->ch);
return $this;
}
function cookieejar($cjar)
{
curl_setopt($this->ch, CURLOPT_COOKIEJAR, $cjar);
return $this;
}
function cookieefile($cfile)
{
curl_setopt($this->ch, CURLOPT_COOKIEFILE, $cfile);
return $this;
}
function followlocation($follow = 1)
{
curl_setopt($this->ch, CURLOPT_FOLLOWLOCATION, $follow);
return $this;
}
function loadInputFieldsFromResponse($response ='')
{
if ($response)
$doc = $response;
else
$doc = $this->lastCurlRes;
/* @var $doc DOMDocument */
//simplexml_load_string($data)
$this->getResponseDoc($doc);
$this->postfields = array();
foreach ($doc->getElementsByTagName('input') as $elem) {
/* @var $elem DomNode */
$name = $elem->getAttribute('name');
// if (!$name)
// $name = $elem->getAttribute('id');
if ($name)
$this->postfields[$name] = $elem->getAttribute("value");
}
return $this;
}
function retuntransfer($transfer=1)
{
curl_setopt($this->ch, CURLOPT_RETURNTRANSFER, $transfer);
return $this;
}
function connectiontimeout($connectiontimeout)
{
curl_setopt($this->ch, CURLOPT_CONNECTTIMEOUT, $connectiontimeout);
return $this;
}
function timeout($timeout)
{
curl_setopt($this->ch, CURLOPT_TIMEOUT, $timeout);
return $this;
}
function useragent($useragent)
{
curl_setopt($this->ch, CURLOPT_USERAGENT, $useragent);
return $this;
}
function referrer($referrer)
{
curl_setopt($this->ch, CURLOPT_REFERER, $referrer);
return $this;
}
function getCURL()
{
return $this->ch;
}
protected $lastCurlRes;
protected $lastCurlResInfo;
function get($url = '')
{
if ($url !== '')
$this->url($url);
curl_setopt($this->ch, CURLOPT_POST, 0);
curl_setopt($this->ch, CURLOPT_HTTPGET, true);
return $this;
}
function sleep($seconds){
sleep($seconds);
return $this;
}
function execute($output=false)
{
$this->lastCurlRes = curl_exec($this->ch);
if ($output == true)
{
echo "Response is \n " . $this->lastCurlRes;
file_put_contents("out.html", $this->lastCurlRes);
}
$this->lastCurlResInfo = curl_getinfo($this->ch);
$this->postfields = array();
return $this;
}
function header($headers)
{
//curl_setopt($this->ch, CURLOPT_HEADER, true);
curl_setopt($this->ch, CURLOPT_HTTPHEADER, $headers);
return $this;
}
function getResponseText(&$text){
$text = $this->lastCurlRes;
return $this;
}
/*
*
* @param DOMDocument $doc
*
*
*/
function getResponseDoc(&$doc){
$doc = new DOMDocument();
libxml_use_internal_errors(false);
libxml_disable_entity_loader();
@$doc->loadHTML($this->lastCurlRes);
return $this;
}
function removePostParameters($keys) {
if (!is_array($keys))
$keys = Array($keys);
foreach ($keys as $key){
if (array_key_exists($key, $this->postfields))
unset($this->postfields[$key]);
}
return $this;
}
function keepPostParameters($keys) {
$delete = Array();
foreach ($this->postfields as $key=>$value){
if (!in_array($key, $keys)){
array_push($delete, $key);
}
}
foreach ($delete as $key) {
unset($this->postfields[$key]);
}
return $this;
}
function updatePostParameter($postarray, $encoded=false)
{
if (is_array($postarray))
{
foreach ($postarray as $key => $value) {
if (is_null($value))
unset($this->postfields[$key]);
else
$this->postfields[$key] = $value;
}}
elseif (is_string($postarray))
{
$parr = preg_split("/&/",$postarray);
foreach ($parr as $postvalue) {
if (($index = strpos($postvalue, "=")) != false)
{
$key = substr($postvalue, 0,$index);
$value = substr($postvalue, $index + 1);
if ($encoded)
$this->postfields[$key]=urldecode($value);
else
$this->postfields[$key]=$value;
}
else
$this->postfields[$postvalue] = "";
}
}
return $this;
}
function getResponseXml(){
//SimpleXMLElement('<INPUT/>')->asXML();
}
function SSLVerifyPeer($verify=false)
{
curl_setopt($this->ch, CURLOPT_SSL_VERIFYPEER, $verify);
return $this;
}
}
?>
答案 2 :(得分:1)
@yvesleborg和@ tarun-lalwani给出了正确的提示。您需要处理cookie和重定向。但是,它总是不适合我。我想站点运营商要求两个请求之间有一些超时。
我稍微改写了你的代码来玩它。 mycurl.php:
function my_curl_init() {
$url="http://www.mca.gov.in/mcafoportal/loginValidateUser.do";
$user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36";
$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
return $ch;
}
/*
* first call in order to get accessCode and sessionCookie
*/
$ch = my_curl_init();
curl_setopt($ch, CURLOPT_COOKIEJAR, __DIR__ . "/cookie.txt"); // else cookielist is empty
$output = curl_exec($ch);
file_put_contents(__DIR__ . '/loginValidateUser.html', $output);
// save cookie info
$cookielist = curl_getinfo($ch, CURLINFO_COOKIELIST);
//print_r($cookielist);
curl_close($ch);
// parse accessCode from output
$re = '/\<input.*name="accessCode".*value="([-0-9]+)"/';
preg_match_all($re, $output, $matches, PREG_SET_ORDER, 0);
if ($matches) {
$accessCode = $matches[0][1];
// debug
echo "accessCode: $accessCode" . PHP_EOL;
/*
* second call in order to login
*/
$post_fields = array(
'userNamedenc' => 'hGJfsdnk`1t',
'passwordenc' => '675894242fa9c66939d9fcf4d5c39d1830f4ddb9',
'accessCode' => $accessCode
);
$cookiedata = preg_split('/\s+/', $cookielist[0]);
$session_cookie = $cookiedata[5] . '=' . $cookiedata[6];
// debug
echo "sessionCookie: $session_cookie" . PHP_EOL;
file_put_contents(__DIR__ . '/cookie2.txt', $session_cookie);
/*
* !!! pause !!!
*/
sleep(20);
// debug
echo "curl -v -L -X POST -b '$session_cookie;' --data 'userNamedenc=hGJfsdnk`1t&passwordenc=675894242fa9c66939d9fcf4d5c39d1830f4ddb9&accessCode=$accessCode' http://www.mca.gov.in/mcafoportal/loginValidateUser.do > loginValidateUser2.html";
$ch = my_curl_init();
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_fields);
curl_setopt($ch, CURLOPT_COOKIE, $session_cookie);
$output = curl_exec($ch);
file_put_contents(__DIR__ . '/loginValidateUser2.html', $output);
curl_close($ch);
}
该脚本向网站发出两个请求。第一个的输出用于读取accessCode
并存储会话cookie。然后在稍微休息之后,使用accessCode和会话信息以及登录凭证发布第二个。
我使用PHP5.6从终端(php -f mycurl.php
)测试了它。该脚本调试所有必要的信息,输出您可以在终端中使用的curl命令,并将HTML和cookie信息记录到脚本等同一文件夹中的某些文件。
经常运行脚本不起作用。登录无效。因此,请花时间在尝试之间等待几分钟。或者更改您的IP;)
希望它有所帮助。
答案 3 :(得分:0)
问题的复制
我在Postman中做了同样的事情,就像截图一样,但无法登录:
我能看到的唯一区别是你的请求有cookie,我怀疑这是你能够在没有所有其他输入字段的情况下登录的原因。似乎有很多输入字段:
使用邮递员
所以,我使用postman拦截来捕获登录期间使用的所有字段,包括验证码和访问代码,我能够登录:
更新1
我发现,一旦您解决了验证码以便登录,在您退出后,您可以在表格数据中没有displayCaptcha
和userEnteredCaptcha
的情况下重新登录,已提供您使用的Cookie与您用于成功登录的Cookie相同。您只需从登录页面获取有效的accessCode
。
答案 4 :(得分:0)
it doesnt work either in PHP/Python
那是(正如其他人已经指出的那样),因为您使用的是浏览器现有的cookie会话,
已经解决了验证码。清除您的浏览器cookie,获得一个新的cookie会话,并且不要解决CAPTCHA,Postman也将无法登录。
Any idea what is missing ?
有几件事,其中包括几个帖子后登录参数(browserFlag
,loginType
,__checkbox_dscBasedLoginFlag
等等,
这里你的编码循环也是错误的$str = $str . "$key=$value" . "&";
,
只要键和值都只包含[a-zA-Z0-9]字符,它几乎只能工作
并且由于您的userNamedenc包含严重的重音字符,因此您的编码循环不足。
foreach($params as $key=>$value){
$str = $str . urlencode($key)."=".urlencode($value) . "&";
}
$str=substr($str,0,-1);
,但是 这正是为什么我们有http_build_query函数,整个循环和以下修剪可以用这一行代替:
$str=http_build_query($params);
,似乎你在没有预先存在的cookie会话的情况下尝试登录, 那不行。当您对登录页面发出GET请求时,您会获得一个cookie和一个唯一的验证码, 验证码答案与您的cookie会话相关联,需要在您尝试登录之前解决, 你也没有提供代码来处理验证码。另外,在解析“userName”输入元素时,它将默认为“输入用户名”,用javascript清空并替换为userNamedenc,你必须在PHP中复制它, 另外,它将有一个名为“dscBasedLoginFlag”的输入元素,使用javascript删除,你还必须在php中执行此部分, 它还有一个名为“Cert”的输入元素,它有一个默认值,但这个值用javascript清除,在php中也一样, 和一个名为“newUserRegistration”的输入元素,使用javascript删除,执行此操作,
这是你应该做的:向登录页面发出GET请求,保存cookie会话并确保为所有进一步的请求提供它,并解析所有登录表单的元素并将它们添加到您的登录请求中(但是小心,有2x表单输入,1个属于搜索栏,只解析登录表单的子项,不要混合2 ),并记得清除/删除特殊输入标签模仿javascript,如上所述, 然后向验证码网址发出GET请求,确保提供会话cookie,解决验证码, 然后使用验证码答案,userNamedenc和passwordenc以及所有其他元素进行最终登录请求 从登录页面解析出...应该可以工作。现在,以编程方式解决验证码, captha看起来不太硬,破解它可能是自动化的,但直到有人真的这样做, 你可以使用Deathbycaptcha为你做,但请注意,它不是免费服务。
这是一个经过全面测试的实用示例实现,使用我的hhb_curl库(来自https://github.com/divinity76/hhb_.inc.php/blob/master/hhb_.inc.php)和Deathbycaptcha api:
<?php
declare(strict_types = 1);
header ( "content-type: text/plain;charset=utf8" );
require_once ('hhb_.inc.php');
const DEATHBYCATPCHA_USERNAME = '?';
const DEATHBYCAPTCHA_PASSWORD = '?';
$hc = new hhb_curl ( '', true );
$hc->setopt(CURLOPT_TIMEOUT,20);// im on a really slow net atm :(
$html = $hc->exec ( 'http://www.mca.gov.in/mcafoportal/login.do' )->getResponseBody (); // cookie session etc
$domd = @DOMDocument::loadHTML ( $html );
$inputs = getDOMDocumentFormInputs ( $domd, true ) ['login'];
$params = [ ];
foreach ( $inputs as $tmp ) {
$params [$tmp->getAttribute ( "name" )] = $tmp->getAttribute ( "value" );
}
assert ( isset ( $params ['userNamedenc'] ), 'username input not found??' );
assert ( isset ( $params ['passwordenc'] ), 'passwordenc input not found??' );
$params ['userName'] = ''; // defaults to "Enter Username", cleared with javascript
unset ( $params ['dscBasedLoginFlag'] ); // removed with javascript
$params ['Cert'] = ''; // cleared to emptystring with javascript
unset ( $params ['newUserRegistration'] ); // removed with javascript
unset ( $params ['SelectCert'] ); // removed with javascript
$params ['userNamedenc'] = 'hGJfsdnk`1t';
$params ['passwordenc'] = '675894242fa9c66939d9fcf4d5c39d1830f4ddb9';
echo 'parsed login parameters: ';
var_dump ( $params );
$captchaRaw = $hc->exec ( 'http://www.mca.gov.in/mcafoportal/getCapchaImage.do' )->getResponseBody ();
$params ['userEnteredCaptcha'] = solve_captcha2 ( $captchaRaw );
// now actually logging in.
$html = $hc->setopt_array ( array (
CURLOPT_POST => true,
CURLOPT_POSTFIELDS => http_build_query ( $params )
) )->exec ( 'http://www.mca.gov.in/mcafoportal/loginValidateUser.do' )->getResponseBody ();
var_dump ( $hc->getStdErr (), $hc->getStdOut () ); // printing debug data
$domd = @DOMDocument::loadHTML ( $html );
$xp = new DOMXPath ( $domd );
$loginErrors = $xp->query ( '//ul[@class="errorMessage"]' );
if ($loginErrors->length > 0) {
echo 'encountered following error(s) logging in: ';
foreach ( $loginErrors as $err ) {
echo $err->textContent, PHP_EOL;
}
die ();
}
echo "logged in successfully!";
/**
* solves the captcha manually, by doing: echo ANSWER>captcha.txt
*
* @param string $raw_image
* raw image bytes
* @return string answer
*/
function solve_captcha2(string $raw_image): string {
$imagepath = getcwd () . DIRECTORY_SEPARATOR . 'captcha.png';
$answerpath = getcwd () . DIRECTORY_SEPARATOR . 'captcha.txt';
@unlink ( $imagepath );
@unlink ( 'captcha.txt' );
file_put_contents ( $imagepath, $raw_image );
echo 'the captcha is saved in ' . $imagepath . PHP_EOL;
echo ' waiting for you to solve it by doing: echo ANSWER>' . $answerpath, PHP_EOL;
while ( true ) {
sleep ( 1 );
if (file_exists ( $answerpath )) {
$answer = trim ( file_get_contents ( $answerpath ) );
echo 'solved: ' . $answer, PHP_EOL;
return $answer;
}
}
}
function solve_captcha(string $raw_image): string {
echo 'solving captcha, hang on, with DEATBYCAPTCHA this usually takes between 10 and 20 seconds.';
{
// unfortunately, CURLFile requires a filename, it wont accept a string, so make a file of it
$tmpfileh = tmpfile ();
fwrite ( $tmpfileh, $raw_image ); // TODO: error checking (incomplete write or whatever)
$tmpfile = stream_get_meta_data ( $tmpfileh ) ['uri'];
}
$hc = new hhb_curl ( '', true );
$hc->setopt_array ( array (
CURLOPT_URL => 'http://api.dbcapi.me/api/captcha',
CURLOPT_POSTFIELDS => array (
'username' => DEATHBYCATPCHA_USERNAME,
'password' => DEATHBYCAPTCHA_PASSWORD,
'captchafile' => new CURLFile ( $tmpfile, 'image/png', 'captcha.png' )
)
) )->exec ();
fclose ( $tmpfileh ); // when tmpfile() is fclosed(), its also implicitly deleted.
$statusurl = $hc->getinfo ( CURLINFO_EFFECTIVE_URL ); // status url is given in a http 300x redirect, which hhb_curl auto-follows
while ( true ) {
// wait for captcha to be solved.
sleep ( 10 );
echo '.';
$json = $hc->setopt_array ( array (
CURLOPT_HTTPHEADER => array (
'Accept: application/json'
),
CURLOPT_HTTPGET => true
) )->exec ()->getResponseBody ();
$parsed = json_decode ( $json, false );
if (! empty ( $parsed->captcha )) {
echo 'captcha solved!: ' . $parsed->captcha, PHP_EOL;
return $parsed->captcha;
}
}
}
function getDOMDocumentFormInputs(\DOMDocument $domd, bool $getOnlyFirstMatches = false): array {
// :DOMNodeList?
$forms = $domd->getElementsByTagName ( 'form' );
$parsedForms = array ();
$isDescendantOf = function (\DOMNode $decendant, \DOMNode $ele): bool {
$parent = $decendant;
while ( NULL !== ($parent = $parent->parentNode) ) {
if ($parent === $ele) {
return true;
}
}
return false;
};
// i can't use array_merge on DOMNodeLists :(
$merged = function () use (&$domd): array {
$ret = array ();
foreach ( $domd->getElementsByTagName ( "input" ) as $input ) {
$ret [] = $input;
}
foreach ( $domd->getElementsByTagName ( "textarea" ) as $textarea ) {
$ret [] = $textarea;
}
foreach ( $domd->getElementsByTagName ( "button" ) as $button ) {
$ret [] = $button;
}
return $ret;
};
$merged = $merged ();
foreach ( $forms as $form ) {
$inputs = function () use (&$domd, &$form, &$isDescendantOf, &$merged): array {
$ret = array ();
foreach ( $merged as $input ) {
// hhb_var_dump ( $input->getAttribute ( "name" ), $input->getAttribute ( "id" ) );
if ($input->hasAttribute ( "disabled" )) {
// ignore disabled elements?
continue;
}
$name = $input->getAttribute ( "name" );
if ($name === '') {
// echo "inputs with no name are ignored when submitted by mainstream browsers (presumably because of specs)... follow suite?", PHP_EOL;
continue;
}
if (! $isDescendantOf ( $input, $form ) && $form->getAttribute ( "id" ) !== '' && $input->getAttribute ( "form" ) !== $form->getAttribute ( "id" )) {
// echo "this input does not belong to this form.", PHP_EOL;
continue;
}
if (! array_key_exists ( $name, $ret )) {
$ret [$name] = array (
$input
);
} else {
$ret [$name] [] = $input;
}
}
return $ret;
};
$inputs = $inputs (); // sorry about that, Eclipse gets unstable on IIFE syntax.
$hasName = true;
$name = $form->getAttribute ( "id" );
if ($name === '') {
$name = $form->getAttribute ( "name" );
if ($name === '') {
$hasName = false;
}
}
if (! $hasName) {
$parsedForms [] = array (
$inputs
);
} else {
if (! array_key_exists ( $name, $parsedForms )) {
$parsedForms [$name] = array (
$inputs
);
} else {
$parsedForms [$name] [] = $tmp;
}
}
}
unset ( $form, $tmp, $hasName, $name, $i, $input );
if ($getOnlyFirstMatches) {
foreach ( $parsedForms as $key => $val ) {
$parsedForms [$key] = $val [0];
}
unset ( $key, $val );
foreach ( $parsedForms as $key1 => $val1 ) {
foreach ( $val1 as $key2 => $val2 ) {
$parsedForms [$key1] [$key2] = $val2 [0];
}
}
}
return $parsedForms;
}
示例用法:在终端中,写一下php foo.php | tee test.html
,几秒后会说出类似的内容:
the captcha is saved in /home/captcha.png
waiting for you to solve it by doing: echo ANSWER>/home/captcha.txt
然后查看/home/captcha.png中的验证码,解决它,并在另一个终端写入:echo ANSWER>/home/captcha.txt
,现在脚本将登录,并将登录的html转储到test.html中,您可以在浏览器中打开,确认它实际登录,运行时截图:https://image.prntscr.com/image/_AsB_0J6TLOFSZuvQdjyNg.png
另请注意,我制作了2个验证码求解器函数,1个使用deathbycaptcha api,并且在第5行和第6行提供有效且有信誉的deathbycaptcha帐户之前不会工作,这不是免费的,另外1,solve_captcha2,问你自己解决验证码,并告诉你验证验证码图像的位置(这样你就可以看看它),以及要写入的命令行参数,以便为它提供答案。只需在第28行将solve_captcha
替换为solve_captcha2
,即可手动解决,反之亦然。使用solve_captcha2对脚本进行了全面测试,但是deadbycaptcha解算器未经测试,因为我的deathbycatpcha帐户是空的(如果你想捐款,所以我可以实际测试它,发送7美元到paypal帐户divinity76@gmail.com与链接到这个主题,我将购买最便宜的deathbycaptcha信用包并实际测试它)
答案 5 :(得分:0)
你可以做的最简单的事情,因为你已经在POSTMAN中使用它,就是在POSTMAN中渲染PHP代码。 Here是从POSTMAN获取PHP代码的链接。然后,您可以将POSTMAN示例与您的代码进行比较。
<?php
$curl = curl_init();
curl_setopt_array($curl, array(
CURLOPT_URL => "http://www.mca.gov.in/mcafoportal/loginValidateUser.do?userNamedenc=hGJfsdnk%601t&passwordenc=675894242fa9c66939d9fcf4d5c39d1830f4ddb9&accessCode=",
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => "",
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 30,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => "POST",
CURLOPT_HTTPHEADER => array(
"cache-control: no-cache",
"postman-token: b54abdc0-17be-f38f-9aba-dbf8f007de99"
),
));
$response = curl_exec($curl);
$err = curl_error($curl);
curl_close($curl);
if ($err) {
echo "cURL Error #:" . $err;
} else {
echo $response;
}
立即突然出现的是这个&#39; hGJfsdnk`1t&#39;。向后引用可以是转义字符“&#39;”。这很可能会引发错误,错误处理会重定向回登录页面。 POSTMAN可能内置了一些内容来将转义字符呈现给&#39; hGJfsdnk%601t&#39;。因此,这适用于POSTMAN,但不适用于您的代码。
以下是此请求的状态:
{
"url": "http:\/\/www.mca.gov.in\/mcafoportal\/login.do",
"content_type": "text\/html;charset=ISO-8859-1",
"http_code": 200,
"header_size": 3020,
"request_size": 821,
"filetime": -1,
"ssl_verify_result": 0,
"redirect_count": 1,
"total_time": 2.920125,
"namelookup_time": 8.2e-5,
"connect_time": 8.7e-5,
"pretransfer_time": 0.000181,
"size_upload": 0,
"size_download": 42381,
"speed_download": 14513,
"speed_upload": 0,
"download_content_length": -1,
"upload_content_length": -1,
"starttransfer_time": 0.320995,
"redirect_time": 2.084554,
"redirect_url": "",
"primary_ip": "115.114.108.120",
"certinfo": [],
"primary_port": 80,
"local_ip": "192.168.1.3",
"local_port": 45086
}
此处显示成功登录。