将多个重复标记“<image></image>
”与数据加载到一个<images>
表格单元格时出现问题。
XML
<posts>
<item>
<id>1</id>
<type>post</type>
<url>www.url.com/1</url>
<date>2016-06-15</date>
<image>some url/1xxx.jpg</image>
<image>some url/1yyy.jpg</image>
<image>some url/1zzz.jpg</image>
</item>
<item>
<id>2</id>
<type>post</type>
<url>www.url.com/2</url>
<date>2016-06-12</date>
<image>some url/2xxx.jpg</image>
<image>some url/2yyy.jpg</image>
<image>some url/2zzz.jpg</image>
<image>some url/2www.jpg</image>
</item>
<item>
<id>3</id>
<type>post</type>
<url>www.url.com/3</url>
<date>2016-06-12</date>
<image>some url/3fff.jpg</image>
</item>
</posts>
代码
现在它只加载<image>
<item>
个标记
LOAD XML local infile 'D:\\demo.xml'
REPLACE
INTO TABLE posts CHARACTER SET UTF8
ROWS IDENTIFIED BY '<item>'
(@id, @type, @url, @date, @image)
SET id=@id, type=@type, url=@url, date = str_to_date(@date, '%Y-%m'), images=@image;
如何将所有重复的<image>
标记存储为图像VARCHAR或TEXT
答案 0 :(得分:0)
考虑使用XSLT转换XML,将item
和images
规范化为一对多表。下面使用PHP来运行XSLT,但大多数通用语言可以运行XSLT 1.0脚本,包括PHP,Perl,Python,Java,C#,VB。具体来说,转换将从<image>
中删除<item>
个标记,保留相应的<id>
并维护两组标记以上传到两个MySQL数据库表。
XSLT 脚本(另存为.xsl文件)
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output indent="yes"/>
<xsl:strip-space elements="*"/>
<xsl:template match="/posts">
<xsl:copy>
<xsl:apply-templates select="item"/>
<xsl:apply-templates select="item/image"/>
</xsl:copy>
</xsl:template>
<xsl:template match="item">
<xsl:copy>
<xsl:copy-of select="*[local-name()!='image']"/>
</xsl:copy>
</xsl:template>
<xsl:template match="item/image">
<images>
<itemid><xsl:value-of select="ancestor::item/id"/></itemid>
<xsl:copy-of select="."/>
</images>
</xsl:template>
</xsl:stylesheet>
PHP 脚本
<?php
$cd = dirname(__FILE__);
// LOAD XML AND XSL FILES
$xml = new DOMDocument('1.0', 'UTF-8');
$xml->load('Original.xml');
$xslfile = new DOMDocument('1.0', 'UTF-8');
$xslfile->load('XSLT_Script.xsl');
// TRANSFORM XML with XSLT
$proc = new XSLTProcessor;
$proc->importStyleSheet($xslfile);
$newXml = $proc->transformToXML($xml);
// SAVE TO FILE
file_put_contents('Output.xml', $newXml);
?>
输出 (图片包含商品ID)
<?xml version="1.0"?>
<posts>
<item>
<id>1</id>
<type>post</type>
<url>www.url.com/1</url>
<date>2016-06-15</date>
</item>
<item>
<id>2</id>
<type>post</type>
<url>www.url.com/2</url>
<date>2016-06-12</date>
</item>
<item>
<id>3</id>
<type>post</type>
<url>www.url.com/3</url>
<date>2016-06-12</date>
</item>
<images>
<itemid>1</itemid>
<image>some url/1xxx.jpg</image>
</images>
<images>
<itemid>1</itemid>
<image>some url/1yyy.jpg</image>
</images>
<images>
<itemid>1</itemid>
<image>some url/1zzz.jpg</image>
</images>
<images>
<itemid>2</itemid>
<image>some url/2xxx.jpg</image>
</images>
<images>
<itemid>2</itemid>
<image>some url/2yyy.jpg</image>
</images>
<images>
<itemid>2</itemid>
<image>some url/2zzz.jpg</image>
</images>
<images>
<itemid>2</itemid>
<image>some url/2www.jpg</image>
</images>
<images>
<itemid>3</itemid>
<image>some url/3fff.jpg</image>
</images>
</posts>
SQL (要上传的两个表格)
-- POSTS TABLE
LOAD XML local infile 'C:\\Path\\To\\Output.xml'
REPLACE
INTO TABLE posts CHARACTER SET UTF8
ROWS IDENTIFIED BY '<item>'
(@id, @type, @url, @date)
SET id=@id, type=@type, url=@url, date=str_to_date(@date, '%Y-%m');
-- IMAGES TABLE
LOAD XML local infile 'C:\\Path\\To\\Output.xml'
REPLACE
INTO TABLE images CHARACTER SET UTF8
ROWS IDENTIFIED BY '<images>'
(@itemid, @image)
SET itemid=@itemid, image=@image;