我正在尝试抓取一个包含AJAX分页html表的网页。我可以很好地获得表格的第一页,但我希望能够获得表格中的每一页。这是ajax分页表的example。
这就是我目前正在抓第一页:
<?php
include_once('simple_html_dom.php');
class JobPosting {
var $Id;
var $RequistionNumber;
var $Title;
var $JobCategory;
var $EmployeeClassification;
var $PartTime;
var $City;
var $Location;
}
function GetJobPostings($filter)
{
$html = file_get_html('https://rn21.ultipro.com/SQU1001/jobboard/listjobs.aspx');
$JobPostings = array();
$headerFlag = true;
foreach($html->find('table.GridTable > tbody > tr') as $job)
{
if($headerFlag == true)
{
$headerFlag = false;
}
else
{
$count = 0;
$jobposting = new JobPosting;
foreach($job->find('td') as $property)
{
switch ($count) {
case 0:
$jobposting->Id = $property->find('a')[0]->href;
$jobposting->RequistionNumber = $property->plaintext;
break;
case 1:
$jobposting->Title = $property->plaintext;
break;
case 2:
$jobposting->JobCategory = $property->plaintext;
break;
case 3:
$jobposting->EmployeeClassification = $property->plaintext;
break;
case 4:
$jobposting->PartTime = $property->plaintext;
break;
case 5:
$jobposting->City = $property->plaintext;
break;
case 6:
$jobposting->Location = $property->plaintext;
break;
}
$count++;
}
if($jobposting->Company == $filter )
{
array_push($JobPostings, $jobposting);
}
}
}
return $JobPostings;
}
?>
如何从本网站获取所有职位发布?
以下是来自fiddler的请求:
答案 0 :(得分:1)
在POST
内容的末尾,搜索__PreparedPXDO
并更改__PageNumber
的值:
__VIEWSTATE=%2FwEPDwUENTM4MWRk8%2BV34Sl40tIYo214geiPudK8q8E%3D&__VIEWSTATEGENERATOR=0906FCBB&__EVENTVALIDATION=%2FwEWDQLRjdiVBQLZmLPIBwLRmtKmBwLJvYapAwKduOGWBgKC9qGSBQL99cOUCAK2xqmfDQKj96rgDQLLvLGTDwLArLTaDQKD%2BaqJAQLUpr%2FLD8lqfch4SOH0EsNlMYgxMFw%2FPlV6&__PXPOSTBACK=1&__PXLBN=&__PXLBV=&__PXDO=&__Keywords=&__RecordsPerPage=&__PreparedPXDO=*501A15E038216814AFF20DE0ED6A92EF0CE34B88DABA9CE31C59DE380A87C4EC0C9572AA25DFFDFB8E15C55C55D711411B386A423D44CE26F051F013F6826174BD97FCDFC3954E186D1F0F623E8C53B4250AEB08B15EF180055517D55245D702DC340152E0ECE6CE6AE715B58F3EA59ADA88C90800A1ED5126FAD95F78078DB20185DBE2A3E3CD0BB550D5CC57A39F33002085E0BC4DAF71FAAE85E859DD39D50C2EDD607B38DD8C9572965A96E105B4896AA698A0663ACE8B3D6AA25271EB9E6225FB183415C27BD742A576A1F32AF4E51012C95495030DDF98AE9007E645CA9D54BA17839DF70BAFDB732014857D3BCFEDAD3F1C8E1BA42A22836C70FC68757FDC843E65FEFAF8D6779A62AF4C791F34DF0EC4794614C6985C18D64694DAD5C5250E7961A0A2B482D32100F42BEB6BA6F35FBB72429D02&__PageNumber= ->HERE<- &__Next=+%3E+
您可以轻松创建循环以废弃所有可用页面,例如:
<?php
$num = 1;
while($num < 5){
$postFields = "__VIEWSTATE=%2FwEPDwUENTM4MWRk8%2BV34Sl40tIYo214geiPudK8q8E%3D&__VIEWSTATEGENERATOR=0906FCBB&__EVENTVALIDATION=%2FwEWDQLRjdiVBQLZmLPIBwLRmtKmBwLJvYapAwKduOGWBgKC9qGSBQL99cOUCAK2xqmfDQKj96rgDQLLvLGTDwLArLTaDQKD%2BaqJAQLUpr%2FLD8lqfch4SOH0EsNlMYgxMFw%2FPlV6&__PXPOSTBACK=1&__PXLBN=&__PXLBV=&__PXDO=&__Keywords=&__RecordsPerPage=&__PreparedPXDO=*501A15E038216814AFF20DE0ED6A92EF0CE34B88DABA9CE31C59DE380A87C4EC0C9572AA25DFFDFB8E15C55C55D711411B386A423D44CE26F051F013F6826174BD97FCDFC3954E186D1F0F623E8C53B4250AEB08B15EF180055517D55245D702DC340152E0ECE6CE6AE715B58F3EA59ADA88C90800A1ED5126FAD95F78078DB20185DBE2A3E3CD0BB550D5CC57A39F33002085E0BC4DAF71FAAE85E859DD39D50C2EDD607B38DD8C9572965A96E105B4896AA698A0663ACE8B3D6AA25271EB9E6225FB183415C27BD742A576A1F32AF4E51012C95495030DDF98AE9007E645CA9D54BA17839DF70BAFDB732014857D3BCFEDAD3F1C8E1BA42A22836C70FC68757FDC843E65FEFAF8D6779A62AF4C791F34DF0EC4794614C6985C18D64694DAD5C5250E7961A0A2B482D32100F42BEB6BA6F35FBB72429D02&__PageNumber=$num&__Next=+%3E+";
$curlHeaders = array(
"Host: rn21.ultipro.com",
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:46.0) Gecko/20100101 Firefox/46.0",
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate, br",
"Referer: https://rn21.ultipro.com/SQU1001/jobboard/listjobs.aspx?Page=Browse",
"Cookie: __jbsrcid=*86EE06D72F666815; ASP.NET_SessionId=3vvj3v55gadspcyklbfg2gip; BIGipServerrn21.ultipro.com_http_pool=366258698.20480.0000",
"Connection: keep-alive",
"Content-Type: application/x-www-form-urlencoded",
"Content-Length: " . strlen($postFields)
);
$url = "https://rn21.ultipro.com/SQU1001/jobboard/listjobs.aspx?Page=Browse";
$ch = curl_init();
curl_setopt($ch,CURLOPT_URL, $url);
curl_setopt($ch,CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch,CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch,CURLOPT_HTTPHEADER, $curlHeaders);
curl_setopt($ch,CURLOPT_POSTFIELDS, $postFields);
$result = curl_exec($ch);
curl_close($ch);
echo $result;
$num++;
}
?>
技巧是使用普通浏览器发布第一个帖子(下一页按钮),使用live http headers for firefox或类似内容嗅探发布内容,以及使用卷曲后模仿它 我已经测试了上面的代码,我能够检索网站上的所有职位。