检查XML元素标记是否为空

时间:2017-07-20 11:45:20

标签: xml xml-parsing

我有一个包含空元素标记的XML:

<manufacturer>
<![CDATA[ ]]>
</manufacturer>

我试图避免将此元素标记添加到我的数据库,但我不能。这是我的整个代码,也是作为cronjob运行..为了更新我的记录..我看到了一些关于你们的用户信息...但我无法理解我错过了什么......并且找不到解决方案..我的数据库中仍然有制造商空记录,虽然我正在检查(或者至少我正在检查)如果制造商元素为空则不要添加它做DB ..你能给我一些解释或解决这个问题吗? ?谢谢!

<?php
header('Content-type: text/html; charset=UTF-8') ;
//connection info here

// Create connection
$conn = new mysqli($servername, $dbuser, $password, $dbname,3306);
// Check connection
if ($conn->connect_error) {
    die("Connection failed: " . $conn->connect_error);
}
// Change character set to utf8
$conn->set_charset("utf8");
date_default_timezone_set('Europe/Athens');

//$date = strtotime("now");
$sql = $conn->query("SELECT * FROM business_xml WHERE activate=1 AND business_id=54"); //business_id = 54

while($exe_xml = mysqli_fetch_object($sql)) {
    $value_of = 0;
    $diff = 0;
    if($exe_xml->date_modified == NULL) {
        $value_of = 1;
    }
    else {
        $modified = $exe_xml->date_modified;
        $now = strtotime("now");
        $diff = $now - $modified;
    }

    //if($diff > 3599 || $value_of == 1) { //$diff = 3600 means one hour difference
        //get values first      
        $date_modified = strtotime("now");
        $business_id = $exe_xml->business_id;
        $xml_link = $exe_xml->xml_link;

        //UPDATE modified date
        $update_business_xml = $conn->query('UPDATE business_xml SET date_modified="' . $date_modified . '" WHERE business_id="' . $business_id . '"');

        $product_xml_link = $xml_link;
        $product_xml_link = $conn->real_escape_string($product_xml_link);
        $product_xml_link = trim(stripslashes($product_xml_link));

        if($product_xml_link) {
                $count_errors = 0;
                //query to find products ids
                //if exists UPDATE, else INSERT
                $query_ids = $conn->query('SELECT pid FROM products WHERE business_id=54');
                $rows_ids = mysqli_num_rows($query_ids);
                $count_id = 0;
                if($rows_ids > 0) {
                    while($exe_ids = mysqli_fetch_object($query_ids)) {
                        $arr_ids[$count_id] = $exe_ids->pid;
                        $count_id++;
                    }
                }

                $reader = new XMLReader();
                $reader->open($product_xml_link);

                while($reader->read()) {
                    if($reader->nodeType == XMLReader::ELEMENT && $reader->name == 'product' ) {
                        //For each node to type "product"
                        $product = new SimpleXMLElement($reader->readOuterXml());

                        $pid = $product->id;
                        $name = $product->name;
                        $name = mb_strtolower($name);
                        $mpn = $product->mpn;
                        $ean = $product->ean;
                        $sku = $product->sku;
                        $link = $product->link;
                        $price = $product->price;
                        $category_id =  $product->category->attributes();
                        $category_path = $product->category;
                        $category_path = mb_strtolower($category_path);
                        $image = $product->image;
                        $availability = $product->availability;
                        $size = $product->size;
                        $size = mb_strtolower($size);
                        $color = $product->color;
                        $color = mb_strtolower($color);
                        $weight = $product->weight;
                        $description = $product->description;
                        $manufacturer = $product->manufacturer;
                        $manufacturer = trim($manufacturer);
                        $instock = "Y";

                        $product_image = $image;
                        $check_product_url = $link;

                        $exist_pids = 0;
                        if(empty($pid) || empty($image) || empty($price) || empty($name) || empty($link) || empty($manufacturer)) {
                            //update status for this pid product, cause there is an empty value, which it is required
                            $update_business_xml = $conn->query('UPDATE products SET status=0 WHERE business_id="' . $business_id . '" AND pid= "' . $pid . '"');
                            $count_errors++;
                        }
                        else {
                            $date_modified = strtotime("now");
                            for($i = 0; $i < $rows_ids; $i++) {
                                if($arr_ids[$i] == $pid) {
                                    $exist_pids = 1;
                                }
                            } //end for loop

                            if($category_id == 613 || $category_id == 604 || $category_id == 635) {
                                //forbidden products
                                //update status for this pid product, cause there is an empty value, which it is required
                                $update_business_xml = $conn->query('UPDATE products SET status=0 WHERE business_id="' . $business_id . '" AND pid= "' . $pid . '"');
                                $count_errors++;
                            }
                            else {
                                if($exist_pids == 1) {
                                    $update_business_xml = $conn->query('UPDATE products SET name="' . $name . '", price="' . $price . '", product_link="' . $link . '", image="' . $image . '", size="' . $size . '", color="' . $color . '" date_modified="' . $date_modified . '" WHERE business_id="' . $business_id . '" AND pid= "' . $pid . '"');
                                }
                                else {
                                    //$date_added = strtotime("now");
                                    $status = 1;
                                    $date = date('d-m-Y H:i:s');
                                    $insert_business_xml = $conn->query('INSERT INTO products (business_id,pid,name,category,product_link,price,size,color,weight,description,manufacturer,mpn,ean,image,sku,instock,availability,status,date_added) VALUES("'.$business_id.'", 
                                    "' . mysqli_real_escape_string($conn,stripslashes($pid)) . '", 
                                    "' . mysqli_real_escape_string($conn,stripslashes($name)) . '", 
                                    "' . mysqli_real_escape_string($conn,stripslashes(trim($category_path))) . '", 
                                    "' . $check_product_url . '", 
                                    "' . mysqli_real_escape_string($conn,stripslashes($price)) . '", 
                                    "' . mysqli_real_escape_string($conn,stripslashes(trim(strtolower($size)))) . '", 
                                    "' . mysqli_real_escape_string($conn,stripslashes(trim(strtolower($color)))) . '", 
                                    "' . mysqli_real_escape_string($conn,stripslashes(trim($weight))) . '", 
                                    "' . mysqli_real_escape_string($conn,stripslashes(trim($description))) . '", 
                                    "' . mysqli_real_escape_string($conn,stripslashes(trim(strtolower($manufacturer)))) . '", 
                                    "' . mysqli_real_escape_string($conn,stripslashes(trim($mpn))) . '", 
                                    "' . mysqli_real_escape_string($conn,stripslashes(trim($ean))) . '", 
                                    "' . $product_image . '", 
                                    "' . mysqli_real_escape_string($conn,stripslashes(trim($sku))) . '", 
                                    "' . mysqli_real_escape_string($conn,stripslashes($instock)) . '", 
                                    "' . mysqli_real_escape_string($conn,stripslashes($availability)) . '", 
                                    "' . $status . '", "' . $date . '")');
                                }
                            }
                        }
                    } //end if $reader-nodeType
                } //end while loop
                $reader->close();

                $insert_messages = "Your XML file has been updated successfully! We found <strong>" . $count_errors . "</strong> errors. In case errors found, please check your dashboard!";
                echo $insert_messages;
        } //end $product_xml_link
    //} //end if diff 3600
} //end while loop
?>

1 个答案:

答案 0 :(得分:0)

实际上“manufacturer”元素不是空的。它包含三个节点。

$xml = <<<'XML'
<manufacturer>
<![CDATA[ ]]>
</manufacturer>
XML;

$document = new DOMDocument();
$document->loadXml($xml);
foreach ($document->documentElement->childNodes as $childNode) {
  var_dump(get_class($childNode), $childNode->textContent);
}

输出:

string(7) "DOMText"
string(1) "
"
string(15) "DOMCdataSection"
string(1) " "
string(7) "DOMText"
string(1) "
"

文本节点是空白节点,它们包含换行符。 DOM中的所有内容都是一种节点,即使属性值也是属性节点的文本子节点。您可以避免在DOM中使用空白节点。在解析之前设置$preserveWhiteSpace属性。

$document = new DOMDocument();
$document->preserveWhiteSpace = FALSE;
$document->loadXml($xml);
foreach ($document->documentElement->childNodes as $childNode) {
  var_dump(get_class($childNode), $childNode->textContent);
}

输出:

string(15) "DOMCdataSection"
string(1) " "

只剩下CDATA部分。它包含一个空间。您可以对文本内容使用trim()并验证结果是否为空字符串,以查看它是否为“空”。您可以直接在“manufacturer”元素节点上执行此操作:

$document = new DOMDocument();
$document->loadXml($xml);
var_dump(trim($document->documentElement->textContent) === '');

输出:

bool(true)

另一种方法是使用normalize-space()。它用一个空格替换所有空白组并修剪结果。所以要获取节点的文本内容,如果它不仅包含空格:

$xml = <<<'XML'
<list>
<manufacturer>
<![CDATA[ ]]>
</manufacturer>
<manufacturer>
<![CDATA[ not only spaces ]]>
</manufacturer>
</list>
XML;

$document = new DOMDocument();
$document->loadXml($xml);
$xpath = new DOMXpath($document);

foreach ($xpath->evaluate('//manufacturer[normalize-space(.) != ""]') as $manufacturer) {
  var_dump(trim($manufacturer->textContent));
}

输出:

string(15) "not only spaces"