通过Curl我已经解析了网站。 但是当我运行foreach循环时,当它进入mysql DB时,它会取出最后一个值。 我理解,因为DB中的插入不在循环中,所以循环将处理db中的最后一个值。 但是当我在循环中输入插入查询时,它会在每一行的左侧输入值。 但我需要一行中的所有值。 示例代码在这里,删除了许多字段。
希望,我能够解释 问候 Rishabh :) 示例链接是:http://www.dvdempire.com/trending-blu-ray.html?page=1 要删除页面的详细信息为:http://www.dvdempire.com/1699319/300-rise-of-an-empire-blu-ray-dvd-ultraviolet-blu-ray.html<?php
set_time_limit(0);
for ($x=1; $x<=2; $x++) {
$userAgent = 'Googlebot/2.1 (http://www.googlebot.com/bot.html)';
sleep(1);
$target_url = "http://www.dvdempire.com/trending-blu-ray.html?page=$x" ;
$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
curl_setopt($ch, CURLOPT_URL,$target_url);
curl_setopt($ch, CURLOPT_FAILONERROR, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
curl_setopt($ch, CURLOPT_TIMEOUT, 13300000);
$html= curl_exec($ch);
if (!$html) {
echo "<br />cURL error number:" .curl_errno($ch);
echo "<br />cURL error:" . curl_error($ch);
exit;
}
// parse the html into a DOMDocument
$dom = new DOMDocument();
@$dom->loadHTML($html);
$xpath = new DOMXPath( $dom );
$query21 = '//div[@class="container"]//p[@class="title"]//a';
$nodes21 = $xpath->query( $query21 );
foreach( $nodes21 as $node21 )
{
$target_url12=$node21->getAttribute('href');
$target_url1 ="http://www.dvdempire.com$target_url12" ;
curl($target_url1) ;
}
}
function curl($target_url1) {
//Sigle Page Scraping
global $userAgent ;
$userAgent = 'Googlebot/2.1 (http://www.googlebot.com/bot.html)';
//$target_url1 = "http://www.dvdempire.com/1695169/3-days-to-kill-blu-ray-dvd-ultraviolet-blu-ray.html";
$ch1 = curl_init();
curl_setopt($ch1, CURLOPT_USERAGENT, $userAgent);
curl_setopt($ch1, CURLOPT_URL,$target_url1);
curl_setopt($ch1, CURLOPT_FAILONERROR, true);
curl_setopt($ch1, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch1, CURLOPT_AUTOREFERER, true);
curl_setopt($ch1, CURLOPT_RETURNTRANSFER,true);
curl_setopt($ch1, CURLOPT_TIMEOUT, 13300000);
$html1= curl_exec($ch1);
if (!$html1) {
echo "<br />cURL error number:" .curl_errno($ch1);
echo "<br />cURL error:" . curl_error($ch1);
exit;
}
// parse the html into a DOMDocument
$dom = new DOMDocument();
@$dom->loadHTML($html1);
$xpath = new DOMXPath( $dom );
$query1 = '//div[@id="GeneralInformation"]//div[@class="Section Synopsis"]//p'; // Synopsis
$query2 = '//div[@class="subsection"]//time[@itemprop="duration"]'; // length
$query7 = '//div[@class="Section Cast"]//li[@itemprop="actor"]//span[@itemprop="name"]'; //cast
$query8 = '//div[@class="Section Cast"]//li[@itemprop="producer"]//span[@itemprop="name"]'; // Producer
$query9 = '//div[@class="Section Cast"]//li[@itemprop="director"]//span[@itemprop="name"]'; // Director
$query10 = '//div[@class="Section ProductInfo"]'; // For UPC Code
$nodes1 = $xpath->query( $query1 );
$nodes2 = $xpath->query( $query2 );
$nodes7 = $xpath->query( $query7 );
$nodes8 = $xpath->query( $query8 );
$nodes9 = $xpath->query( $query9 );
$nodes10 = $xpath->query( $query10 );
//Synopsis
echo "--------------- Synopsis --------------- " ;
foreach( $nodes1 as $node1 )
{
echo $a1=$node1->nodeValue;
}
echo "<br>" ;
// length
echo "--------------- Length --------------- " ;
foreach( $nodes2 as $node2 )
{
echo $a2=$node2->nodeValue;
}
echo "<br>" ;
echo "-------------- Cast --------------- " ;
foreach( $nodes7 as $node7 )
{
$a7=$node7->nodeValue;
echo $a7 = $a7.",";
}
echo "<br>" ;
echo "-------------- Producer --------------- " ;
foreach( $nodes8 as $node8 )
{
$a8=$node8->nodeValue;
echo $a8 = $a8.",";
}
echo "<br>" ;
echo "-------------- Director --------------- " ;
foreach( $nodes9 as $node9 )
{
$a9=$node9->nodeValue;
echo $a9 = $a9.",";
}
echo "<hr>" ;
//数据库插入 / * $ con = mysql_connect(“localhost”,“root”,“”); if(!$ con) { 死('无法连接:'。mysql_error()); } mysql_select_db(“dvd”,$ con);
$sql="INSERT INTO data (synopsis,length,rating,cast,producer,director)
VALUES
('$a1','$a2','$a7','$a8','$a9')";
if (!mysql_query($sql,$con))
{
die('Error: ' . mysql_error());
}
mysql_close($con) ;
*/
} ?&GT;
答案 0 :(得分:0)
这有点粗糙,但我会这样做。 虽然我肯定会使用使用PDO的预处理语句,而不是mysql_ *,因为这些函数已被弃用。但这又是另一个话题。
现在......这就是我在这种特殊情况下的表现......
$values=array();
foreach( $nodes1 as $node1 )
{
echo $a1=$node1->nodeValue;
$values[]["Synopsis"] = $a1;
}
echo "<br>" ;
echo "--------------- Length --------------- " ;
foreach( $nodes2 as $node2 )
{
echo $a2=$node2->nodeValue;
$values[]["Length"] = $a2;
}
echo "<br>" ;
echo "-------------- Cast --------------- " ;
foreach( $nodes7 as $node7 )
{
$a7=$node7->nodeValue;
echo $a7 = $a7.",";
$values[]["Cast"] = $a7;
}
echo "<br>" ;
echo "-------------- Producer --------------- " ;
foreach( $nodes8 as $node8 )
{
$a8=$node8->nodeValue;
echo $a8 = $a8.",";
$values[]["Producer"] = $a8;
}
echo "<br>" ;
echo "-------------- Director --------------- " ;
foreach( $nodes9 as $node9 )
{
$a9=$node9->nodeValue;
echo $a9 = $a9.",";
$values[]["Director"] = $a9;
}
echo "<hr>" ;
然后这样做......
$string="";
//Loop through your values and build the query string
foreach($values as $v){
$string.="(".$v['Synopsis'].",".$v['Length'].",".$v['Cast'].",".$v['Producer'].",".$v['Director']."),";
}
//Trim the last comma off the string
rtrim($string,',');
//Append to your query and then execute
$sql="INSERT INTO data (synopsis,length,cast,producer,director) VALUES ".$string;