以下是我正在使用的代码 - 我将其设置为一个示例。正如您所看到的,它正确地获取了所有变量并设置了数组,但缺少listPrice2变量......请帮忙。
<?php
$html = "http://www.toysrus.com/family/index.jsp?searchSort=TRUE&categoryId=13131514&s=A-StorePrice&ppg=8";
$agent="Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6";
$ch = curl_init();
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt ($ch, CURLOPT_COOKIEFILE, $cookie);
curl_setopt($ch, CURLOPT_VERBOSE, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_USERAGENT, $agent);
curl_setopt($ch, CURLOPT_URL,$html);
$html=curl_exec ($ch);
function tru_extract( $html , $fields='prodloop-thumbnail=<a href.*\"(.*)\",swatchProdImg=<.*src.*\"(.*)\",prodtitle,adjusted ourPrice2,listPrice2' )
{
$arr_data = array();
if(!$html)
die("NO Html");
preg_match_all('@<div class="prodloop_cont.*>(.*)</div.*prodloop_float@isU', $html, $records);
$arr_get_class = explode(",", $fields );
foreach($records[0] as $r)
{
$data = array();
foreach($arr_get_class as $class)
{
$regex = $fieldname ='';
$regex='class="'.$class.'[\s]*".*>(.*)</';
$fieldname=$class;
if(stristr($class, "*"))
{
list($fieldname, $regex) = explode("=", $class);
}
preg_match('@'.$regex.'@isU', $r, $row);
$value = trim(strip_tags($row[1]));
$data[$fieldname] = $value;
}
$arr_data[] = $data;
}
return $arr_data;
}
$arr_data = tru_extract( $html );
?><pre>
<?php
print_r ($arr_data);
exit;
?>
</pre>
?>
我尝试过添加完整的span类,但仍然没有。 print_r返回变量数组,但listPrice2除外。