<?php
$i=1;
while ($i<=5) {
# code...
$url = 'http://www.amazon.in/gp/bestsellers/electronics/ref=zg_bs_nav_0#'.$i;
echo $url;
$html= file_get_contents($url);
$dom = new DOMDocument();
@$dom->loadHTML($html);
$xPath = new DOMXPath($dom);
$classname="zg_title";
$elements = $xPath->query("//*[contains(@class, '$classname')]");
foreach ($elements as $e)
{
$lnk = $e->getAttribute('href');
$e->setAttribute("href", "http://www.amazon.in".$lnk);
$newdoc = new DOMDocument;
$e = $newdoc->importNode($e, true);
$newdoc->appendChild($e);
$html = $newdoc->saveHTML();
echo $html;
}
$i++;
}
?>
我正在尝试浏览亚马逊畅销书页面,其中列出了每个页面中包含20个项目的前100个畅销书项目。在每个循环中,$ i值都会更改并附加到URL。但只有前20个项目被显示5次,我认为这与ajax分页有关,但我无法弄清楚它是什么。
答案 0 :(得分:1)
试试这个:
<?php
$i=1;
while ($i<=5) {
# code...
$url = 'http://www.amazon.in/gp/bestsellers/electronics/ref=zg_bs_electronics_pg_'.$i.'?ie=UTF8&pg='.$i;
echo $url;
$html= file_get_contents($url);
$dom = new DOMDocument();
@$dom->loadHTML($html);
$xPath = new DOMXPath($dom);
$classname="zg_title";
$elements = $xPath->query("//*[contains(@class, '$classname')]");
foreach ($elements as $e)
{
$lnk = $e->getAttribute('href');
$e->setAttribute("href", "http://www.amazon.in".$lnk);
$newdoc = new DOMDocument;
$e = $newdoc->importNode($e, true);
$newdoc->appendChild($e);
$html = $newdoc->saveHTML();
echo $html;
}
$i++;
}
?>
更改$url