我有两张桌子" product_title"拥有产品名称和" product_list_url"有产品清单页面的网址。我想从产品标题表中获取标题并获取并逐个转到每个URL并在页面上搜索相同的标题,如果找到,则提取一些数据并保存到数据库中。
我想检查每个网址上的每个产品。但无法获得所需的结果。
这是我的代码..
<?php
set_time_limit(0);
$dbhost = "localhost";
$dbuser = "root";
$dbpass = "";
$dbname = "crawl";
$conn = mysql_connect($dbhost, $dbuser, $dbpass) or die ("Error connecting to database");
mysql_select_db($dbname, $conn);
$q1 = mysql_query("SELECT * FROM `product-title`");
$q2 = mysql_query("SELECT * FROM `product-list-url`");
while($res1 = mysql_fetch_assoc($q1)){
$product_title = $res1['title'];
while($res2 = mysql_fetch_assoc($q2)){
$url = $res2['url'];
$html = file_get_contents($url);
$doc = new DOMDocument();
@$doc->loadHTML($html);
$xpath = new DOMXPath($doc);
$found = $xpath->evaluate("boolean(//span[contains(text(), '$product_title')])");
if($found == false){
echo "Not Found";
}
else{
$elements = $xpath->evaluate("//span[contains(text(), '$product_title' )]/following-sibling::div/span[@class='list_sale_price']");
if (!is_null($elements)) {
foreach ($elements as $element) {
$nodes = $element->childNodes;
foreach ($nodes as $node) {
$price = $node->nodeValue;
$price1 = preg_replace('/[^0-9-.]/','',$price);
$date = date('y-m-d');
mysql_query("INSERT INTO `prices` (`ptitle`, `price`, `date`) VALUES ('$product_title', '$price1', '$date')") or die(mysql_error());
}
}
}
}
}
}
?>
它只为第一个标题插入一条记录。
答案 0 :(得分:0)
首先,mysql_
的使用不好since it'll be deprecated。关于主题,我认为您正在寻找JOIN的答案。您的表product-title
必须对每种产品都有某种唯一的ID吗?我假设您的product-list-url
表也与第一个表中的产品具有匹配的ID。由于我没有任何表格结构,我会给你举例。
SELECT t1.title
, t2.url
FROM `product-title` t1
JOIN `product-list-url` t2 ON t1.id = t2.id
答案 1 :(得分:0)
重置指针可能允许您在第一个产品标题后处理记录。未经测试,也未检查过您的文档处理。
<?php
set_time_limit(0);
$dbhost = "localhost";
$dbuser = "root";
$dbpass = "";
$dbname = "crawl";
$conn = mysql_connect($dbhost, $dbuser, $dbpass) or die ("Error connecting to database");
mysql_select_db($dbname, $conn);
$q1 = mysql_query("SELECT * FROM `product-title`");
$q2 = mysql_query("SELECT * FROM `product-list-url`");
while($res1 = mysql_fetch_assoc($q1))
{
$product_title = $res1['title'];
while($res2 = mysql_fetch_assoc($q2))
{
$url = $res2['url'];
$html = file_get_contents($url);
$doc = new DOMDocument();
@$doc->loadHTML($html);
$xpath = new DOMXPath($doc);
$found = $xpath->evaluate("boolean(//span[contains(text(), '$product_title')])");
if($found == false)
{
echo "Not Found";
}
else
{
$elements = $xpath->evaluate("//span[contains(text(), '$product_title' )]/following-sibling::div/span[@class='list_sale_price']");
if (!is_null($elements)) {
foreach ($elements as $element) {
$nodes = $element->childNodes;
foreach ($nodes as $node) {
$price = $node->nodeValue;
$price1 = preg_replace('/[^0-9-.]/','',$price);
$date = date('y-m-d');
mysql_query("INSERT INTO `prices` (`ptitle`, `price`, `date`) VALUES ('$product_title', '$price1', '$date')") or die(mysql_error());
}
}
}
}
}
mysql_data_seek($q2, 0);
}
?>
然而,将标题列表读入内存可能更有效,然后处理每个文件一次,多次检查内容。每个标题一次。
这样的事情: -
<?php
set_time_limit(0);
$dbhost = "localhost";
$dbuser = "root";
$dbpass = "";
$dbname = "crawl";
$conn = mysql_connect($dbhost, $dbuser, $dbpass) or die ("Error connecting to database");
mysql_select_db($dbname, $conn);
$product_titles = array();
$q1 = mysql_query("SELECT * FROM `product-title`");
while($res1 = mysql_fetch_assoc($q1))
{
$product_titles[] = $res1['title'];
}
$q2 = mysql_query("SELECT * FROM `product-list-url`");
while($res2 = mysql_fetch_assoc($q2))
{
$url = $res2['url'];
$html = file_get_contents($url);
$doc = new DOMDocument();
@$doc->loadHTML($html);
$xpath = new DOMXPath($doc);
foreach($product_titles as $product_title)
{
$found = $xpath->evaluate("boolean(//span[contains(text(), '$product_title')])");
if($found == false)
{
echo "Not Found";
}
else
{
$elements = $xpath->evaluate("//span[contains(text(), '$product_title' )]/following-sibling::div/span[@class='list_sale_price']");
if (!is_null($elements))
{
foreach ($elements as $element)
{
$nodes = $element->childNodes;
foreach ($nodes as $node)
{
$price = $node->nodeValue;
$price1 = preg_replace('/[^0-9-.]/','',$price);
$date = date('y-m-d');
mysql_query("INSERT INTO `prices` (`ptitle`, `price`, `date`) VALUES ('$product_title', '$price1', '$date')") or die(mysql_error());
}
}
}
}
}
}
?>