解析XML并插入到DB(如果存在),否则更新

时间:2017-07-26 21:25:28

标签: php xml xml-parsing sql-update sql-insert

我正在解析XML文件,如果id不存在,我想 INSERT XML元素节点,如果存在则更新记录...

到目前为止,这是我的代码:

<?php
header('Content-type: text/html; charset=UTF-8') ;
//connection to DB here..
//.... .. .. .. ..


// Create connection
$conn = new mysqli($servername, $dbuser, $password, $dbname,3306);
// Check connection
if ($conn->connect_error) {
    die("Connection failed: " . $conn->connect_error);
}
// Change character set to utf8
$conn->set_charset("utf8");
date_default_timezone_set('Europe/Athens');

$date_modified = strtotime("now");
$business_id = 54;
$xml_link = "https://www.mydomain.gr/test.xml";
$xml_link = $conn->real_escape_string($xml_link);
$xml_link = trim(stripslashes($xml_link));

我在这里更新记录“date_modified”..

$update_business_xml = $conn->query('UPDATE business_xml SET date_modified="' . $date_modified . '" WHERE business_id=54');

现在,在这段代码中,我得到了所有产品的ID,并将它们放在一个数组中

$count_errors = 0;
//query to find products ids
$query_ids = $conn->query('SELECT pid FROM products WHERE business_id=54');
$rows_ids = mysqli_num_rows($query_ids);
$count_id = 0;
if($rows_ids > 0) {
    while($exe_ids = mysqli_fetch_object($query_ids)) {
        $arr_ids[$count_id] = $exe_ids->pid;
        $count_id++;
    }
}

$reader = new XMLReader();
$reader->open($xml_link);           
while($reader->read()) {
    if($reader->nodeType == XMLReader::ELEMENT && $reader->name == 'product' ) {
        $product = new SimpleXMLElement($reader->readOuterXml());

        $pid = $product->id;
        $name = $product->name;
        $name = mb_strtolower($name,'UTF-8');
        $mpn = $product->mpn;
        $ean = $product->ean;
        $sku = $product->sku;
        $link = $product->link;
        $price = $product->price;
        $category_id =  $product->category->attributes();
        $category_path = $product->category;
        $category_path = mb_strtolower($category_path,'UTF-8');
        $image = $product->image;
        $availability = $product->availability;
        $size = $product->size;
        $size = mb_strtolower($size,'UTF-8');
        $color = $product->color;
        $color = mb_strtolower($color,'UTF-8');
        $weight = $product->weight;
        $description = $product->description;
        $manufacturer = $product->manufacturer;
        $manufacturer = trim($manufacturer);
        $instock = "Y";
        $product_image = $image;
        $check_product_url = $link;

        $exist_pids = 0;
        if(!empty($pid) || !empty($image) || !empty($price) || !empty($name) || !empty($link) || !empty($manufacturer)) {
            if($category_id == 613 || $category_id == 604 || $category_id == 635) {

我在XML中有一些我不希望它们在我的数据库中的类别,所以我在这里得到该类别的属性,并检查该产品是否是其中之一,只需将此记录的状态更新到我的数据库

                $update_business_xml = $conn->query('UPDATE products SET status=0 WHERE business_id="' . $business_id . '" AND pid= "' . $pid . '"');
                $count_errors++;
            }
            else {
                $status = 1;
                $date = date('d-m-Y H:i:s'); //when insert a pr
                $date_modified = strtotime("now"); //when modify a pr
                $insert_business_xml = $conn->query('INSERT INTO products (business_id,pid,name,category,product_link,price,size,color,weight,description,manufacturer,mpn,ean,image,sku,instock,availability,status,date_added) VALUES("'.$business_id.'", 
                "' . mysqli_real_escape_string($conn,stripslashes($pid)) . '", 
                "' . mysqli_real_escape_string($conn,stripslashes($name)) . '", 
                "' . mysqli_real_escape_string($conn,stripslashes(trim($category_path))) . '", 
                "' . $check_product_url . '", 
                "' . mysqli_real_escape_string($conn,stripslashes($price)) . '", 
                "' . mysqli_real_escape_string($conn,stripslashes(trim(strtolower($size)))) . '", 
                "' . mysqli_real_escape_string($conn,stripslashes(trim(strtolower($color)))) . '", 
                "' . mysqli_real_escape_string($conn,stripslashes(trim($weight))) . '", 
                "' . mysqli_real_escape_string($conn,stripslashes(trim($description))) . '", 
                "' . mysqli_real_escape_string($conn,stripslashes(trim(strtolower($manufacturer)))) . '", 
                "' . mysqli_real_escape_string($conn,stripslashes(trim($mpn))) . '", 
                "' . mysqli_real_escape_string($conn,stripslashes(trim($ean))) . '", 
                "' . $product_image . '", 
                "' . mysqli_real_escape_string($conn,stripslashes(trim($sku))) . '", 
                "' . mysqli_real_escape_string($conn,stripslashes($instock)) . '", 
                "' . mysqli_real_escape_string($conn,stripslashes($availability)) . '", 
                "' . $status . '", "' . $date . '") ON DUPLICATE KEY UPDATE
                "business_id='.$business_id.'", 
                "pid=' . mysqli_real_escape_string($conn,stripslashes($pid)) . '", 
                "name=' . mysqli_real_escape_string($conn,stripslashes($name)) . '", 
                "category=' . mysqli_real_escape_string($conn,stripslashes(trim($category_path))) . '", 
                "product_link=' . $check_product_url . '", 
                "price=' . mysqli_real_escape_string($conn,stripslashes($price)) . '", 
                "size=' . mysqli_real_escape_string($conn,stripslashes(trim(strtolower($size)))) . '", 
                "color=' . mysqli_real_escape_string($conn,stripslashes(trim(strtolower($color)))) . '", 
                "weight=' . mysqli_real_escape_string($conn,stripslashes(trim($weight))) . '", 
                "description=' . mysqli_real_escape_string($conn,stripslashes(trim($description))) . '", 
                "manufacturer=' . mysqli_real_escape_string($conn,stripslashes(trim(strtolower($manufacturer)))) . '", 
                "mpn=' . mysqli_real_escape_string($conn,stripslashes(trim($mpn))) . '", 
                "ean=' . mysqli_real_escape_string($conn,stripslashes(trim($ean))) . '", 
                "image=' . $product_image . '", 
                "sku=' . mysqli_real_escape_string($conn,stripslashes(trim($sku))) . '", 
                "instock=' . mysqli_real_escape_string($conn,stripslashes($instock)) . '", 
                "availability=' . mysqli_real_escape_string($conn,stripslashes($availability)) . '", 
                "status=' . $status . '", 
                "date_modified=' . $date_modified . '"');

在上面的代码中,我使用INSERT INTO ... ON DUPLICATE KEY UPDATE以检查id是否存在,只更新该记录的所有值。如果id不存在,则INSERT这个。 。 在我告诉我的代码开始时,我得到了一个数组中的所有id,所以在这里我正在删除已找到的id ..

                //erase from arr_ids
                if(($key = array_search($pid, $arr_ids)) !== false) {
                    unset($arr_ids[$key]);
                }
            }
        }
        else {
            $update_business_xml = $conn->query('UPDATE products SET status=0, date_modified="' . $date_modified . '" WHERE business_id="' . $business_id . '" AND pid= "' . $pid . '"');
            $count_errors++;
        }
    } //reader nodeType
} //end while loop
$reader->close();

最后但并非最不重要的是,在这里我也更新了记录,其余的所有其余的数据都留在了数组中......这意味着这些ID,不存在或者没有在XML中列出任何更多。

foreach($arr_ids as $id) {
    $update_business_xml = $conn->query('UPDATE products SET status=0 WHERE business_id="' . $business_id . '" AND pid= "' . $id . '"');
}

最后,我有一条简单的消息,以便查看有多少错误。通过说错误,我的意思是有多少XML记录有空元素(名称,价格,链接,图像...等)并且只是回应......

//var_dump($arr_ids);
$insert_messages = "Your XML file has been updated successfully! We found <strong>" . $count_errors . "</strong> errors. In case errors found, please check your dashboard!";
echo $insert_messages;
$conn->close();
?>

所有这些代码都在一个php文件中,以 cronjob 的形式运行! 现在问题和我的问题是,我没有看到UPDATE正常运行或根本没有运行,因为我的数据库中的date_modified总是NULL,这是初始化值。我错过了什么? (通常我可以做些什么来修复整个代码,如果这里有多个错误?)

提前致谢

带有一个产品的XML样本(希腊语XML)

<mystore>
<created_at>2017-07-26 16:01:20</created_at>
<products>
<product>
<id>9307</id>
<name>
<![CDATA[ Minimum ανδρικό t-shirt φλάμα Percy ivory ]]>
</name>
<link>
<![CDATA[
https://www.mydomain.gr/andrika-rouxa/tshirts-andrikes-mployzes/minimum-andriko-t-shirt-percy-ivory.html
]]>
</link>
<image>
<![CDATA[
https://www.mydomain.gr/images/detailed/51/minimum-andriko-t-shirt-percy-122690105_(1).jpg
]]>
</image>
<sku>
<![CDATA[ 122690105-wh ]]>
</sku>
<mpn>
<![CDATA[ 122690105-wh ]]>
</mpn>
<category id="30">
<![CDATA[ ΑΝΔΡΙΚΑ > T- shirts ]]>
</category>
<price>27.30</price>
<description>
<![CDATA[
<ul><li>χρώμα ελεφαντόδοντου</li><li>στρογγυλή λαιμόκοψη</li><li>στρογγυλεμένο και μακρύτερο πίσω μέρος</li><li>regular fit</li><li>100% cotton</li></ul>
]]>
</description>
<instock>Y</instock>
<availability>Σε απόθεμα</availability>
<manufacturer>
<![CDATA[ Minimum ]]>
</manufacturer>
<size>L,XL</size>
<sex>
<![CDATA[ Άνδρας ]]>
</sex>
</product>
</products>
</mystore>

date_modified是我的数据库中的varchar(128) - utf8_general_ci 我不认为这是一个问题,对吧?

1 个答案:

答案 0 :(得分:1)

考虑使用 products_temp 表,产品的确切结构,但仅用于存储XML数据。从那里运行需要追加和更新查询以从临时表迁移到最终表。

如果这里有任何外卖,请使用parameterized queries,以避免引用封装和变量连接,以获得更清晰,更安全,可维护的代码。

XML数据处理(在循环中附加查询)

// CLEAN OUT TEMP TABLE
$sql = 'DELETE FROM products_temp';
$delete_xml = $conn->query($sql);

// PREPARED STATEMENT
$sql = 'INSERT INTO products_temp (business_id, pid, `name`, `category`, product_link, price,
                                   size, color, weight, `description`, manufacturer, mpn, ean,
                                   image, sku, instock, availability, `status`, date_added) 
        VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)';

// APPEND ALL RAW XML DATA INTO TEMP TABLE (IN LOOP, WITHOUT INNER IF LOGIC)
//...same xml objects

while($reader->read()) {
    if($reader->nodeType == XMLReader::ELEMENT && $reader->name == 'product') {
        //...same xml variables
        $product = new SimpleXMLElement($reader->readOuterXml());
        $stmt = $conn->prepare($sql);

        $stmt->bind_param("sssssssssssssssssss", 
                          mysqli_real_escape_string($conn,stripslashes($pid)), 
                          mysqli_real_escape_string($conn,stripslashes($name)), 
                          mysqli_real_escape_string($conn,stripslashes(trim($category_path))), 
                          $check_product_url, 
                          mysqli_real_escape_string($conn,stripslashes($price)), 
                          mysqli_real_escape_string($conn,stripslashes(trim(strtolower($size)))), 
                          mysqli_real_escape_string($conn,stripslashes(trim(strtolower($color)))), 
                          mysqli_real_escape_string($conn,stripslashes(trim($weight))), 
                          mysqli_real_escape_string($conn,stripslashes(trim($description))), 
                          mysqli_real_escape_string($conn,stripslashes(trim(strtolower($manufacturer)))), 
                          mysqli_real_escape_string($conn,stripslashes(trim($mpn))), 
                          mysqli_real_escape_string($conn,stripslashes(trim($ean))), 
                          $product_image, 
                          mysqli_real_escape_string($conn,stripslashes(trim($sku))), 
                          mysqli_real_escape_string($conn,stripslashes($instock)), 
                          mysqli_real_escape_string($conn,stripslashes($availability)), 
                          $status, 
                          $date);

        $stmt->execute();
    }
}

临时数据处理(外部循环,每次调用一次,避免使用数组和其他foreach

使用WHERE NOT EXISTS

// APPEND ONLY NEW TEMP PRODUCTS WITH RELEVANT INFO AND NOT IN SPECIAL CATEGS INTO PRODUCTS
$sql = 'INSERT INTO products (business_id, pid, `name`, `categor`y, product_link, price,
                              size, color, weight, `description`, manufacturer, mpn, ean,
                              image, sku, instock, availability, `status`, date_added) 
        SELECT t.business_id, t.pid, t.name, t.category, t.product_link, t.price,
               t.size, t.color, t.weight, t.description, t.manufacturer, t.mpn, t.ean,
               t.image, t.sku, t.instock, t.availability, t.status, t.date_added
        FROM products_temp t
        WHERE NOT EXISTS (SELECT 1 FROM products sub 
                          WHERE sub.p_id = t.p_id AND sub.business_id = t.business_id)
          AND t.image IS NOT NULL AND t.price IS NOT NULL AND t.name IS NOT NULL 
          AND t.link IS NOT NULL AND t.manufacturer IS NOT NULL
          AND t.category_id NOT IN (604, 613, 635)';
$insert_business_xml = $conn->query($sql);

使用UPDATE INNER JOIN

// UPDATE MATCHED TEMP PROUCTS WITH MISSING RELEVANT INFO OR IN SPECIAL CATEGS (I.E., ERRORS)
$sql = 'UPDATE products p INNER JOIN products_temp t
                           ON p.p_id = t.p_id AND p.business_id = t.business_id
        SET p.status=0, p.date_modified = ?
        WHERE t.image IS NULL OR t.price IS NULL OR t.name IS NULL 
           OR t.link IS NULL OR t.manufacturer IS NULL
           OR t.category IN (604, 613, 635)';

$stmt = $conn->prepare($sql);
$stmt->bind_param("s", $date_modified);

$stmt->execute();
$count_errors = $mysqli->affected_rows;     // ERRORS FOR MESSAGE AT END


// UPDATE EXISTING MATCHED TEMP PRODUCTS WITH RELEVANT INFO AND NOT IN SPECIAL CATEGS
$sql = 'UPDATE products p INNER JOIN products_temp t
                           ON p.p_id = t.p_id AND p.business_id = t.business_id
        SET p.business_id = t.business_id, p.name = t.name, p.category = t.category, 
            p.product_link = t.product_link, p.price = t.price, p.size = t.size, 
            p.color = t.color, p.weight = t.weight, p.description = t.description, 
            p.manufacturer = t.manufacturer, p.mpne = t.mpn, p.ean = t.ean,
            p.image = t.image, p.sku = t.sku, p.instock = t.instock, 
            p.availability = t.availability, p.status = t.status, p.date_added = t.date_added
        WHERE t.image IS NOT NULL AND t.price IS NOT NULL AND t.name IS NOT NULL 
          AND t.link IS NOT NULL AND t.manufacturer IS NOT NULL
          AND t.category_id NOT IN (604, 613, 635)';
$update_business_xml = $conn->query($sql);


// UPDATE EXISTING NON-MATCHED TEMP PRODUCTS
$sql = 'UPDATE products p SET p.status = 0
        WHERE NOT EXISTS (SELECT 1 FROM products_temp sub 
                          WHERE sub.p_id = p.p_id AND sub.business_id = p.business_id)';
$update_business_xml = $conn->query($sql);

注意:当然没有实际数据和数据库,所有这些都是未经测试的。请调整任何被忽略的语法并集成到更大的代码库中。