我有一个包含空元素标记的XML:
<manufacturer>
<![CDATA[ ]]>
</manufacturer>
我试图避免将此元素标记添加到我的数据库,但我不能。这是我的整个代码,也是作为cronjob运行..为了更新我的记录..我看到了一些关于你们的用户信息...但我无法理解我错过了什么......并且找不到解决方案..我的数据库中仍然有制造商空记录,虽然我正在检查(或者至少我正在检查)如果制造商元素为空则不要添加它做DB ..你能给我一些解释或解决这个问题吗? ?谢谢!
<?php
header('Content-type: text/html; charset=UTF-8') ;
//connection info here
// Create connection
$conn = new mysqli($servername, $dbuser, $password, $dbname,3306);
// Check connection
if ($conn->connect_error) {
die("Connection failed: " . $conn->connect_error);
}
// Change character set to utf8
$conn->set_charset("utf8");
date_default_timezone_set('Europe/Athens');
//$date = strtotime("now");
$sql = $conn->query("SELECT * FROM business_xml WHERE activate=1 AND business_id=54"); //business_id = 54
while($exe_xml = mysqli_fetch_object($sql)) {
$value_of = 0;
$diff = 0;
if($exe_xml->date_modified == NULL) {
$value_of = 1;
}
else {
$modified = $exe_xml->date_modified;
$now = strtotime("now");
$diff = $now - $modified;
}
//if($diff > 3599 || $value_of == 1) { //$diff = 3600 means one hour difference
//get values first
$date_modified = strtotime("now");
$business_id = $exe_xml->business_id;
$xml_link = $exe_xml->xml_link;
//UPDATE modified date
$update_business_xml = $conn->query('UPDATE business_xml SET date_modified="' . $date_modified . '" WHERE business_id="' . $business_id . '"');
$product_xml_link = $xml_link;
$product_xml_link = $conn->real_escape_string($product_xml_link);
$product_xml_link = trim(stripslashes($product_xml_link));
if($product_xml_link) {
$count_errors = 0;
//query to find products ids
//if exists UPDATE, else INSERT
$query_ids = $conn->query('SELECT pid FROM products WHERE business_id=54');
$rows_ids = mysqli_num_rows($query_ids);
$count_id = 0;
if($rows_ids > 0) {
while($exe_ids = mysqli_fetch_object($query_ids)) {
$arr_ids[$count_id] = $exe_ids->pid;
$count_id++;
}
}
$reader = new XMLReader();
$reader->open($product_xml_link);
while($reader->read()) {
if($reader->nodeType == XMLReader::ELEMENT && $reader->name == 'product' ) {
//For each node to type "product"
$product = new SimpleXMLElement($reader->readOuterXml());
$pid = $product->id;
$name = $product->name;
$name = mb_strtolower($name);
$mpn = $product->mpn;
$ean = $product->ean;
$sku = $product->sku;
$link = $product->link;
$price = $product->price;
$category_id = $product->category->attributes();
$category_path = $product->category;
$category_path = mb_strtolower($category_path);
$image = $product->image;
$availability = $product->availability;
$size = $product->size;
$size = mb_strtolower($size);
$color = $product->color;
$color = mb_strtolower($color);
$weight = $product->weight;
$description = $product->description;
$manufacturer = $product->manufacturer;
$manufacturer = trim($manufacturer);
$instock = "Y";
$product_image = $image;
$check_product_url = $link;
$exist_pids = 0;
if(empty($pid) || empty($image) || empty($price) || empty($name) || empty($link) || empty($manufacturer)) {
//update status for this pid product, cause there is an empty value, which it is required
$update_business_xml = $conn->query('UPDATE products SET status=0 WHERE business_id="' . $business_id . '" AND pid= "' . $pid . '"');
$count_errors++;
}
else {
$date_modified = strtotime("now");
for($i = 0; $i < $rows_ids; $i++) {
if($arr_ids[$i] == $pid) {
$exist_pids = 1;
}
} //end for loop
if($category_id == 613 || $category_id == 604 || $category_id == 635) {
//forbidden products
//update status for this pid product, cause there is an empty value, which it is required
$update_business_xml = $conn->query('UPDATE products SET status=0 WHERE business_id="' . $business_id . '" AND pid= "' . $pid . '"');
$count_errors++;
}
else {
if($exist_pids == 1) {
$update_business_xml = $conn->query('UPDATE products SET name="' . $name . '", price="' . $price . '", product_link="' . $link . '", image="' . $image . '", size="' . $size . '", color="' . $color . '" date_modified="' . $date_modified . '" WHERE business_id="' . $business_id . '" AND pid= "' . $pid . '"');
}
else {
//$date_added = strtotime("now");
$status = 1;
$date = date('d-m-Y H:i:s');
$insert_business_xml = $conn->query('INSERT INTO products (business_id,pid,name,category,product_link,price,size,color,weight,description,manufacturer,mpn,ean,image,sku,instock,availability,status,date_added) VALUES("'.$business_id.'",
"' . mysqli_real_escape_string($conn,stripslashes($pid)) . '",
"' . mysqli_real_escape_string($conn,stripslashes($name)) . '",
"' . mysqli_real_escape_string($conn,stripslashes(trim($category_path))) . '",
"' . $check_product_url . '",
"' . mysqli_real_escape_string($conn,stripslashes($price)) . '",
"' . mysqli_real_escape_string($conn,stripslashes(trim(strtolower($size)))) . '",
"' . mysqli_real_escape_string($conn,stripslashes(trim(strtolower($color)))) . '",
"' . mysqli_real_escape_string($conn,stripslashes(trim($weight))) . '",
"' . mysqli_real_escape_string($conn,stripslashes(trim($description))) . '",
"' . mysqli_real_escape_string($conn,stripslashes(trim(strtolower($manufacturer)))) . '",
"' . mysqli_real_escape_string($conn,stripslashes(trim($mpn))) . '",
"' . mysqli_real_escape_string($conn,stripslashes(trim($ean))) . '",
"' . $product_image . '",
"' . mysqli_real_escape_string($conn,stripslashes(trim($sku))) . '",
"' . mysqli_real_escape_string($conn,stripslashes($instock)) . '",
"' . mysqli_real_escape_string($conn,stripslashes($availability)) . '",
"' . $status . '", "' . $date . '")');
}
}
}
} //end if $reader-nodeType
} //end while loop
$reader->close();
$insert_messages = "Your XML file has been updated successfully! We found <strong>" . $count_errors . "</strong> errors. In case errors found, please check your dashboard!";
echo $insert_messages;
} //end $product_xml_link
//} //end if diff 3600
} //end while loop
?>
答案 0 :(得分:0)
实际上“manufacturer”元素不是空的。它包含三个节点。
$xml = <<<'XML'
<manufacturer>
<![CDATA[ ]]>
</manufacturer>
XML;
$document = new DOMDocument();
$document->loadXml($xml);
foreach ($document->documentElement->childNodes as $childNode) {
var_dump(get_class($childNode), $childNode->textContent);
}
输出:
string(7) "DOMText"
string(1) "
"
string(15) "DOMCdataSection"
string(1) " "
string(7) "DOMText"
string(1) "
"
文本节点是空白节点,它们包含换行符。 DOM中的所有内容都是一种节点,即使属性值也是属性节点的文本子节点。您可以避免在DOM中使用空白节点。在解析之前设置$preserveWhiteSpace
属性。
$document = new DOMDocument();
$document->preserveWhiteSpace = FALSE;
$document->loadXml($xml);
foreach ($document->documentElement->childNodes as $childNode) {
var_dump(get_class($childNode), $childNode->textContent);
}
输出:
string(15) "DOMCdataSection"
string(1) " "
只剩下CDATA部分。它包含一个空间。您可以对文本内容使用trim()
并验证结果是否为空字符串,以查看它是否为“空”。您可以直接在“manufacturer”元素节点上执行此操作:
$document = new DOMDocument();
$document->loadXml($xml);
var_dump(trim($document->documentElement->textContent) === '');
输出:
bool(true)
另一种方法是使用normalize-space()
。它用一个空格替换所有空白组并修剪结果。所以要获取节点的文本内容,如果它不仅包含空格:
$xml = <<<'XML'
<list>
<manufacturer>
<![CDATA[ ]]>
</manufacturer>
<manufacturer>
<![CDATA[ not only spaces ]]>
</manufacturer>
</list>
XML;
$document = new DOMDocument();
$document->loadXml($xml);
$xpath = new DOMXpath($document);
foreach ($xpath->evaluate('//manufacturer[normalize-space(.) != ""]') as $manufacturer) {
var_dump(trim($manufacturer->textContent));
}
输出:
string(15) "not only spaces"