使用PYTHON将CSV文件转换为XML文件

时间:2019-04-03 20:18:40

标签: python xml csv

我想用python将csv文件转换为xml文件。我想将csv文件中的相同ID分组在一起,然后将csv转换为xml(请参见所需的输出)。它比缩进,循环和将csv分组为xml看起来要复杂一些。感谢所有帮助。

My CSV file:

id,x1,y1,z1,x2,y2,z2,c1,R
a1,1.3,2.1,3.6,4.5,5.1,6.8,B,7.3
b2,1.1,2.1,3.1,4.1,5.1,6.1,G,7.1
c1,2.1,3.1,4.1,5.1,2.1,7.1,G,8.1
a1,2.2,3.2,4.2,5.2,6.2,7.2,S,8.2
b2,4.1,5.1,2.1,7.1,8.1,9.1,S,2.5
b2,3.6,4.5,5.1,6.3,7.4,8.2,G,3.1
c2,6.1,7.1,8.1,9.1,2.1,11.1,S,3.2
c1,1.5,1.5,1.5,1.5,1.5,1.5,A,1.5


my code:

import itertools
import csv
import os

csvFile = r'C:\Users\Desktop\test XML\csvfile.csv'
xmlFile = r'C:\Users\Desktop\test XML\myData.xml'

csvData = csv.reader(open(csvFile))
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0" encoding="UTF-8"?>' + "\n" +'<Roughness-Profiles xmlns="http://WKI/Roughness-Profiles/1">' + "\n" )
xmlData.write(' '+'<Roughness-Profile>' + "\n")

rowNum = 0
for row in csvData:
    if rowNum == 0:
        tags = row
        # replace spaces w/ underscores in tag names
        for i in range(len(tags)):
            tags[i] = tags[i].replace(' ', '_')
    else: 
        xmlData.write('  '+'<surfaces>' +"\n"+'   '+'<surface>' + "\n")
        for i in range (len(tags)):
            xmlData.write('    ' +'<' + tags[i] + '>' \
                      + row[i] + '</' + tags[i] + '>' + "\n")

        xmlData.write('   '+'</surface>' + "\n" + '  '+'</surfaces>' + "\n" + ' '+'</Roughness-Profile>' + "\n")

    rowNum +=1

xmlData.write('</Roughness-Profiles>' + "\n")
xmlData.close()

我的xml输出:

<?xml version="1.0" encoding="UTF-8"?>
<Roughness-Profiles xmlns="http://WKI/Roughness-Profiles/1">
 <Roughness-Profile>
  <surfaces>
   <surface>
    <id>a1</id>
    <x1>1.3</x1>
    <y1>2.1</y1>
    <z1>3.6</z1>
    <x2>4.5</x2>
    <y2>5.1</y2>
    <z2>6.8</z2>
    <c1>B</c1>
    <R>7.3</R>
   </surface>
  </surfaces>
 </Roughness-Profile>
  <surfaces>
   <surface>
    <id>b2</id>
    <x1>1.1</x1>
    <y1>2.1</y1>
    <z1>3.1</z1>
    <x2>4.1</x2>
    <y2>5.1</y2>
    <z2>6.1</z2>
    <c1>G</c1>
    <R>7.1</R>
   </surface>
  </surfaces>
 </Roughness-Profile>
  <surfaces>
   <surface>
    <id>c1</id>
    <x1>2.1</x1>
    <y1>3.1</y1>
    <z1>4.1</z1>
    <x2>5.1</x2>
    <y2>2.1</y2>
    <z2>7.1</z2>
    <c1>G</c1>
    <R>8.1</R>
   </surface>
  </surfaces>
 </Roughness-Profile>
  <surfaces>
   <surface>
    <id>a1</id>
    <x1>2.2</x1>
    <y1>3.2</y1>
    <z1>4.2</z1>
    <x2>5.2</x2>
    <y2>6.2</y2>
    <z2>7.2</z2>
    <c1>S</c1>
    <R>8.2</R>
   </surface>
  </surfaces>
 </Roughness-Profile>
  <surfaces>
   <surface>
    <id>b2</id>
    <x1>4.1</x1>
    <y1>5.1</y1>
    <z1>2.1</z1>
    <x2>7.1</x2>
    <y2>8.1</y2>
    <z2>9.1</z2>
    <c1>S</c1>
    <R>2.5</R>
   </surface>
  </surfaces>
 </Roughness-Profile>
  <surfaces>
   <surface>
    <id>b2</id>
    <x1>3.6</x1>
    <y1>4.5</y1>
    <z1>5.1</z1>
    <x2>6.3</x2>
    <y2>7.4</y2>
    <z2>8.2</z2>
    <c1>G</c1>
    <R>3.1</R>
   </surface>
  </surfaces>
 </Roughness-Profile>
  <surfaces>
   <surface>
    <id>c2</id>
    <x1>6.1</x1>
    <y1>7.1</y1>
    <z1>8.1</z1>
    <x2>9.1</x2>
    <y2>2.1</y2>
    <z2>11.1</z2>
    <c1>S</c1>
    <R>3.2</R>
   </surface>
  </surfaces>
 </Roughness-Profile>
  <surfaces>
   <surface>
    <id>c1</id>
    <x1>1.5</x1>
    <y1>1.5</y1>
    <z1>1.5</z1>
    <x2>1.5</x2>
    <y2>1.5</y2>
    <z2>1.5</z2>
    <c1>A</c1>
    <R>1.5</R>
   </surface>
  </surfaces>
 </Roughness-Profile>
</Roughness-Profiles>

所需的输出应为:

<?xml version="1.0" encoding="UTF-8"?>
<R-Profiles xmlns="http://WKI/R-Profiles/1">
 <R-Profile>
  <id>a1</id>
  <surfaces>
   <surface>
    <x1>1.3</x1>
    <y1>2.1</y1>
    <z1>3.6</z1>
    <x2>4.5</x2>
    <y2>5.1</y2>
    <z2>6.8</z2>
    <c1>B</c1>
    <R>7.3</R>
   </surface>
   <surface>
    <x1>2.2</x1>
    <y1>3.2</y1>
    <z1>4.2</z1>
    <x2>5.2</x2>
    <y2>6.2</y2>
    <z2>7.2</z2>
    <c1>S</c1>
    <R>8.2</R>
   </surface>
    </surfaces>
 </R-Profile>
 <R-Profile>
  <id>b2</id>
  <surfaces>
   <surface>
    <x1>1.1</x1>
    <y1>2.1</y1>
    <z1>3.1</z1>
    <x2>4.1</x2>
    <y2>5.1</y2>
    <z2>6.1</z2>
    <c1>G</c1>
    <R>7.1</R>
   </surface>
   <surface>
    <x1>4.1</x1>
    <y1>5.1</y1>
    <z1>2.1</z1>
    <x2>7.1</x2>
    <y2>8.1</y2>
    <z2>9.1</z2>
    <c1>S</c1>
    <R>2.5</R>
   </surface>
   <surface>
    <x1>3.6</x1>
    <y1>4.5</y1>
    <z1>5.1</z1>
    <x2>6.3</x2>
    <y2>7.4</y2>
    <z2>8.2</z2>
    <c1>G</c1>
    <R>3.1</R>
   </surface>
    </surfaces>
 </R-Profile>
 <R-Profile>
  <id>c1</id>
  <surfaces>
   <surface>
    <x1>2.1</x1>
    <y1>3.1</y1>
    <z1>4.1</z1>
    <x2>5.1</x2>
    <y2>2.1</y2>
    <z2>7.1</z2>
    <c1>G</c1>
    <R>8.1</R>
   </surface>
   <surface>
    <x1>1.5</x1>
    <y1>1.5</y1>
    <z1>1.5</z1>
    <x2>1.5</x2>
    <y2>1.5</y2>
    <z2>1.5</z2>
    <c1>A</c1>
    <R>1.5</R>
   </surface>
   </surfaces>
 </R-Profile>
 <R-Profile>
  <id>c2</id>
  <surfaces>
   <surface>
    <x1>6.1</x1>
    <y1>7.1</y1>
    <z1>8.1</z1>
    <x2>9.1</x2>
    <y2>2.1</y2>
    <z2>11.1</z2>
    <c1>S</c1>
    <R>3.2</R>
   </surface>
  </surfaces>
 </R-Profile>
</R-Profiles>

3 个答案:

答案 0 :(得分:1)

我会做类似于@Parfait建议的操作;使用csv.DictReaderlxml创建XML。

但是,该答案缺少一些内容; spring.datasource.platform=mssql 元素未按surface分组。

如果我需要在转换期间对XML进行分组,那么我想到的第一件事就是XSLT。

一旦掌握了窍门,使用XSLT即可轻松进行分组;尤其是2.0或更高版本。不幸的是,lxml仅支持XSLT 1.0。在1.0中,您需要使用Muenchian Grouping

这是创建中间XML并使用XSLT进行转换的完整示例。

CSV输入(test.csv)

id

XSLT 1.0 (test.xsl)

id,x1,y1,z1,x2,y2,z2,c1,R
a1,1.3,2.1,3.6,4.5,5.1,6.8,B,7.3
b2,1.1,2.1,3.1,4.1,5.1,6.1,G,7.1
c1,2.1,3.1,4.1,5.1,2.1,7.1,G,8.1
a1,2.2,3.2,4.2,5.2,6.2,7.2,S,8.2
b2,4.1,5.1,2.1,7.1,8.1,9.1,S,2.5
b2,3.6,4.5,5.1,6.3,7.4,8.2,G,3.1
c2,6.1,7.1,8.1,9.1,2.1,11.1,S,3.2
c1,1.5,1.5,1.5,1.5,1.5,1.5,A,1.5

Python

<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  xmlns:rp="http://WKI/Roughness-Profiles/1">
  <xsl:output indent="yes"/>
  <xsl:strip-space elements="*"/>

  <xsl:key name="surface" match="rp:surface" use="rp:id"/>

  <xsl:template match="@*|node()">
    <xsl:copy>
      <xsl:apply-templates select="@*|node()"/>
    </xsl:copy>
  </xsl:template>

  <xsl:template match="/*">
    <xsl:copy>
      <xsl:apply-templates select="@*"/>
      <xsl:for-each select="rp:surface[count(.|key('surface',rp:id)[1])=1]">
        <xsl:element name="Roughness-Profile" namespace="http://WKI/Roughness-Profiles/1">
          <xsl:copy-of select="rp:id"/>
          <xsl:element name="surfaces" namespace="http://WKI/Roughness-Profiles/1">
            <xsl:apply-templates select="key('surface',rp:id)"/>
          </xsl:element>
        </xsl:element>
      </xsl:for-each>
    </xsl:copy>
  </xsl:template>

  <xsl:template match="rp:id"/>

</xsl:stylesheet>

XML输出(test.xml)

import csv
import lxml.etree as etree

# INITIALIZING XML FILE WITH ROOT IN PROPER NAMESPACE
nsmap = {None: "http://WKI/Roughness-Profiles/1"}
root = etree.Element('Roughness-Profiles', nsmap=nsmap)

# READING CSV FILE
with open("test.csv") as f:
    reader = csv.DictReader(f)

    # WRITE INITIAL XML NODES
    for row in reader:
        surface_elem = etree.SubElement(root, "surface", nsmap=nsmap)
        for elem_name, elem_value in row.items():
            etree.SubElement(surface_elem, elem_name.strip(), nsmap=nsmap).text = str(elem_value)

# PARSE XSLT AND CREATE TRANSFORMER
xslt_root = etree.parse("test.xsl")
transform = etree.XSLT(xslt_root)

# TRANSFORM
#  (Note the weird use of tostring/fromstring. This was used so
#   namespaces in the XSLT would work the way they're supposed to.)
final_xml = transform(etree.fromstring(etree.tostring(root)))

# WRITE OUTPUT TO FILE
final_xml.write_output("test.xml")

答案 1 :(得分:0)

首先从CSV中读取所有行并对它们进行排序。

稍后,只有当新行中的previous_id与上一行不同时,才可以使用变量Roughness-Profile打开和关闭surfaces / id

我用StringIO模拟了csv文件,并用sys.stdout模拟了xml文件-这样每个人都可以复制并运行代码以查看其工作原理

text ='''id,x1,y1,z1,x2,y2,z2,c1,R
a1,1.3,2.1,3.6,4.5,5.1,6.8,B,7.3
b2,1.1,2.1,3.1,4.1,5.1,6.1,G,7.1
c1,2.1,3.1,4.1,5.1,2.1,7.1,G,8.1
a1,2.2,3.2,4.2,5.2,6.2,7.2,S,8.2
b2,4.1,5.1,2.1,7.1,8.1,9.1,S,2.5
b2,3.6,4.5,5.1,6.3,7.4,8.2,G,3.1
c2,6.1,7.1,8.1,9.1,2.1,11.1,S,3.2
c1,1.5,1.5,1.5,1.5,1.5,1.5,A,1.5'''

from io import StringIO
import csv
import sys

#csvFile = r'C:\Users\Desktop\test XML\csvfile.csv'
#xmlFile = r'C:\Users\Desktop\test XML\myData.xml'

#csvData = csv.reader(open(csvFile))
#xmlData = open(xmlFile, 'w')

csvData = csv.reader(StringIO(text))
xmlData = sys.stdout

# read all data to sort them
csvData = list(csvData)
tags = [item.replace(' ', '_') for item in csvData[0]] # headers
csvData = sorted(csvData[1:]) # sort data without headers

xmlData.write('<?xml version="1.0" encoding="UTF-8"?>\n<Roughness-Profiles xmlns="http://WKI/Roughness-Profiles/1">\n')

previous_id = None

for row in csvData:
    row_id = row[0]
    if row_id != previous_id:
        # close previous group - but only if it is not first group
        if previous_id is not None: 
            xmlData.write('</surfaces>\n</Roughness-Profile>\n')
        # open new group  
        xmlData.write('<Roughness-Profile>\n<id>{}</id>\n<surfaces>\n'.format(row_id))
        # remeber new group's id
        previous_id = row_id

    # surface
    xmlData.write('<surface>\n')
    for value, tag in zip(row[1:], tags[1:]): 
        xmlData.write('<{}>{}</{}>\n'.format(tag, value, tag))
    xmlData.write('</surface>\n')

# close last group
xmlData.write('</surfaces>\n</Roughness-Profile>\n')
xmlData.write('</Roughness-Profiles>\n')

#xmlData.close()

没有StringIOsys.stdout的版本

import csv

csvFile = r'C:\Users\Desktop\test XML\csvfile.csv'
xmlFile = r'C:\Users\Desktop\test XML\myData.xml'

csvData = csv.reader(open(csvFile))
xmlData = open(xmlFile, 'w')

# read all data to sort them
csvData = list(csvData)
tags = [item.replace(' ', '_') for item in csvData[0]] # headers
csvData = sorted(csvData[1:]) # sort data without headers

xmlData.write('<?xml version="1.0" encoding="UTF-8"?>\n<Roughness-Profiles xmlns="http://WKI/Roughness-Profiles/1">\n')

previous_id = None

for row in csvData:
    row_id = row[0]
    if row_id != previous_id:
        # close previous group - but only if it is not first group
        if previous_id is not None: 
            xmlData.write('</surfaces>\n</Roughness-Profile>\n')
        # open new group  
        xmlData.write('<Roughness-Profile>\n<id>{}</id>\n<surfaces>\n'.format(row_id))
        # remeber new group's id
        previous_id = row_id

    # surface
    xmlData.write('<surface>\n')
    for value, tag in zip(row[1:], tags[1:]): 
        xmlData.write('<{}>{}</{}>\n'.format(tag, value, tag))
    xmlData.write('</surface>\n')

# close last group
xmlData.write('</surfaces>\n</Roughness-Profile>\n')
xmlData.write('</Roughness-Profiles>\n')

xmlData.close()

答案 2 :(得分:0)

由于XML文件不是文本文件,而是附着在W3C specifications上的特殊的基于文本的结构化数据文档,因此避免了通过字符串连接构建文档。

实际上,应使用几乎所有现代编程语言(包括Python)及其内置xml.etree或更强大的第三方模块lxml中可用的适当DOM库。实际上,由于所需的输出涉及按 id 对节点进行分组,因此请考虑运行XSLT,这是一种专用于转换XML文件的专用语言。 lxml模块可以运行XSLT 1.0脚本。

下面使用内置的DictReader模块的csv构建嵌套的id字典(所有列都分组在id键下)。然后,通过遍历此字典的内容以将数据写入元素节点来构建XML。

import csv
from collections import OrderedDict
import lxml.etree as ET

# BUILD NESTED ID DICTIONARY FROM CSV
with open("Input.csv") as f:
    reader = csv.DictReader(f)      

    id_dct = OrderedDict({})
    for dct in reader:      
        if dct["id"] not in id_dct.keys():
            id_dct[dct["id"]] = [OrderedDict({k:v for k,v in dct.items() if k!= "id"})]
        else:
            id_dct[dct["id"]].append(OrderedDict({k:v for k,v in dct.items() if k!= "id"}))         

# INITIALIZING XML FILE WITH ROOT AND NAMESPACE
root = ET.Element('R-Profiles', nsmap={None: "http://WKI/Roughness-Profiles/1"})

# WRITING TO XML NODES
for k,v in id_dct.items():  
    rpNode = ET.SubElement(root, "R-Profile")
    ET.SubElement(rpNode, "id").text = str(k)
    surfacesNode = ET.SubElement(rpNode, "surfaces")

    for dct in v:
        surfaceNode = ET.SubElement(surfacesNode, "surface")
        for k,v in dct.items():         
            ET.SubElement(surfaceNode, k).text = str(v)

# OUTPUT XML CONTENT TO FILE
tree_out = ET.tostring(root, pretty_print=True, xml_declaration=True, encoding="UTF-8")

with open('Output.xml','wb') as f:
    f.write(tree_out)

Input.csv

id,x1,y1,z1,x2,y2,z2,c1,R
a1,1.3,2.1,3.6,4.5,5.1,6.8,B,7.3
b2,1.1,2.1,3.1,4.1,5.1,6.1,G,7.1
c1,2.1,3.1,4.1,5.1,2.1,7.1,G,8.1
a1,2.2,3.2,4.2,5.2,6.2,7.2,S,8.2
b2,4.1,5.1,2.1,7.1,8.1,9.1,S,2.5
b2,3.6,4.5,5.1,6.3,7.4,8.2,G,3.1
c2,6.1,7.1,8.1,9.1,2.1,11.1,S,3.2
c1,1.5,1.5,1.5,1.5,1.5,1.5,A,1.5

Output.xml

<?xml version='1.0' encoding='UTF-8'?>
<R-Profiles xmlns="http://WKI/Roughness-Profiles/1">
  <R-Profile>
    <id>a1</id>
    <surfaces>
      <surface>
        <x1>1.3</x1>
        <y1>2.1</y1>
        <z1>3.6</z1>
        <x2>4.5</x2>
        <y2>5.1</y2>
        <z2>6.8</z2>
        <c1>B</c1>
        <R>7.3</R>
      </surface>
      <surface>
        <x1>2.2</x1>
        <y1>3.2</y1>
        <z1>4.2</z1>
        <x2>5.2</x2>
        <y2>6.2</y2>
        <z2>7.2</z2>
        <c1>S</c1>
        <R>8.2</R>
      </surface>
    </surfaces>
  </R-Profile>
  <R-Profile>
    <id>b2</id>
    <surfaces>
      <surface>
        <x1>1.1</x1>
        <y1>2.1</y1>
        <z1>3.1</z1>
        <x2>4.1</x2>
        <y2>5.1</y2>
        <z2>6.1</z2>
        <c1>G</c1>
        <R>7.1</R>
      </surface>
      <surface>
        <x1>4.1</x1>
        <y1>5.1</y1>
        <z1>2.1</z1>
        <x2>7.1</x2>
        <y2>8.1</y2>
        <z2>9.1</z2>
        <c1>S</c1>
        <R>2.5</R>
      </surface>
      <surface>
        <x1>3.6</x1>
        <y1>4.5</y1>
        <z1>5.1</z1>
        <x2>6.3</x2>
        <y2>7.4</y2>
        <z2>8.2</z2>
        <c1>G</c1>
        <R>3.1</R>
      </surface>
    </surfaces>
  </R-Profile>
  <R-Profile>
    <id>c1</id>
    <surfaces>
      <surface>
        <x1>2.1</x1>
        <y1>3.1</y1>
        <z1>4.1</z1>
        <x2>5.1</x2>
        <y2>2.1</y2>
        <z2>7.1</z2>
        <c1>G</c1>
        <R>8.1</R>
      </surface>
      <surface>
        <x1>1.5</x1>
        <y1>1.5</y1>
        <z1>1.5</z1>
        <x2>1.5</x2>
        <y2>1.5</y2>
        <z2>1.5</z2>
        <c1>A</c1>
        <R>1.5</R>
      </surface>
    </surfaces>
  </R-Profile>
  <R-Profile>
    <id>c2</id>
    <surfaces>
      <surface>
        <x1>6.1</x1>
        <y1>7.1</y1>
        <z1>8.1</z1>
        <x2>9.1</x2>
        <y2>2.1</y2>
        <z2>11.1</z2>
        <c1>S</c1>
        <R>3.2</R>
      </surface>
    </surfaces>
  </R-Profile>
</R-Profiles>