我目前在使用flipkart显示网址的图片和价格时出现问题,我想要的是显示产品图片和网址的价格,并不断抓取其后的所有网址并显示详细信息在一个页面中相同,但我得到错误说达到重定向限制,无法打开流。
这是我的代码:
<?php
ini_set('max_execution_time', 4000);
$to_crawl = "http://www.flipkart.com/apple-iphone-6/p/itme5rf6ewg7trwz?pid=MOBEYGPZAHZQMCKZ&otracker=from-search&srno=t_4&query=apple&al=hplRX0gsd%2BUs3897GU7MA33GdyuXyA9x5heu%2FXnCd8gCFiEqsIXwVoaLq2lx4bRfFLwHQxVDMNU%3D&ref=cfd05202-e814-4bb4-bbe2-422b4ecc6df9";
$c = array();
function getPriceFromFlipkart($url) {
$curl = curl_init($url);
curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 10.10; labnol;) ctrlq.org");
curl_setopt($curl, CURLOPT_FAILONERROR, true);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
$html = curl_exec($curl);
curl_close($curl);
$regex = '/<meta itemprop="price" content="([^"]*)"/';
preg_match($regex, $html, $price);
$regex = '/<h1[^>]*>([^<]*)<\/h1>/';
preg_match($regex, $html, $title);
$regex = '/data-src="([^"]*)"/i';
preg_match($regex, $html, $image);
}
function get_links($url){
global $c;
$input = file_get_contents($url);
$regexp = "<a\s[^>]*href=(\"??)([^\" >]*?)\\1[^>]*>(.*)<\/a>";
preg_match_all("/$regexp/siU", $input, $matches);
$base_url = parse_url($url, PHP_URL_HOST);
$l = $matches[2];
foreach($l as $link) {
if(strpos($link, "#")) {
$link = substr($link,0, strpos($link, "#"));
}
if(substr($link,0,1) == ".") {
$link = substr($link, 1);
}
if(substr($link,0,7)=="http://") {
$link = $link;
}
else if(substr($link,0,8) =="https://") {
$link = $link;
}
else if(substr($link,0,2) =="//") {
$link = substr($link, 2);
}
else if(substr($link,0,2) =="#") {
$link = $url;
}
else if(substr($link,0,2) =="mailto:") {
$link = "[".$link."]";
}
else {
if(substr($link,0,1) != "/") {
$link = $base_url."/".$link;
}
else {
$link = $base_url.$link;
}
}
if(substr($link, 0, 7)=="http://" && substr($link, 0, 8)!="https://" && substr($link, 0, 1)=="[") {
if(substr($url, 0, 8) == "https://") {
$link = "https://".$link;
}
else {
$link = "http://".$link;
}
}
//echo $link."<br />";
if(!in_array($link,$c)) {
array_push($c,$link);
}
}
}
get_links($to_crawl);
foreach ($c as $page) {
get_links($page);
}
foreach ($c as $page) {
$response = getPriceFromFlipkart($page);
echo json_encode($response);
echo $page."<br />";
}
?>