重新格式化xml文件

时间:2016-05-01 19:21:02

标签: xml bash xslt awk

我有以下xml文件:

<?xml version="1.0" encoding="UTF-8"?>
    <Data xmlns="http://www.test.com">
        <form Name="Group1">
            <Period Time="19042016T08:35:00"/>
            <Type c="1">Load</Type>
            <Type c="2">Memory</Type>
            <Type c="3">CPU</Type>
            <Type c="4">Task</Type>
            <Value Item="M-1">
                <val c="1">2979</val>
                <val c="2">0</val>
                <val c="3">9599</val>
                <val c="4">0</val>
            </Value>
            <Value Item="M-2">
                <val c="1">2973</val>
                <val c="2">0</val>
                <val c="3">0</val>
                <val c="4">0</val>
            </Value>
            <Value Item="M-3">
                <val c="1">2985</val>
                <val c="2">11889</val>
                <val c="3">0</val>
                <val c="4">0</val>
            </Value>
            <Value Item="M-4">
                <val c="1">28</val>
                <val c="2">0</val>
                <val c="3">0</val>
                <val c="4">2980</val>
            </Value>
        </form>
        <form Name="Group2">
            <Period Time="19042016T08:35:00"/>
            <Type c="1">Process</Type>
            <Type c="2">User</Type>
            <Type c="3">command</Type>
            <Type c="4">priority</Type>
            <Value Item="M-1">
                <val c="1">0</val>
                <val c="2">0</val>
                <val c="3">0</val>
                <val c="4">4477</val>
            </Value>
            <Value Item="M-2">
                <val c="1">0</val>
                <val c="2">0</val>
                <val c="3">0</val>
                <val c="4">4540</val>
            </Value>
            <Value Item="M-3">
                <val c="1">0</val>
                <val c="2">0</val>
                <val c="3">0</val>
                <val c="4">4526</val>
            </Value>
            <Value Item="M-4">
                <val c="1">0</val>
                <val c="2">0</val>
                <val c="3">0</val>
                <val c="4">4445</val>
            </Value>
        </form>
    </Data>

我希望得到如下输出:

Group=Group1, Time=19042016T08:35:00    
             M-1        M-2         M-3         M-4     
Load        2979       2973        2985          28
Memory         0          0       11889           0
CPU         9599          0           0           0
Task           0          0           0        2980

Name=Group2, Time=19042016T08:35:00    
             M-1        M-2         M-3         M-4     
Process        0          0           0           0
User           0          0           0           0
command        0          0           0           0
priority    4477       4540        4445        4445  

通过查看此处建议的一些xsltproc示例。我想出了以下样式表。

<?xml version="1.0" ?>
<xsl:stylesheet version="1.0" 
                xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
                xmlns:t="http://www.test.com">
    <xsl:output method="text"/>
    <xsl:strip-space elements="*"/>
    <xsl:template match="/t:Data">
        <xsl:for-each select="t:form">
            <xsl:value-of select="concat('Group=', @Name, ', Time=',t:Period/@Time,'&#xA;')"/>
            <xsl:for-each select="t:Type">
                <xsl:value-of select="concat('CounterName=', text(), '&#xA;')"/>
            </xsl:for-each>
            <xsl:for-each select="t:Value">
                <xsl:value-of select="concat('Machine=', @Item, '&#xA;')"/>
                <xsl:for-each select="t:val">
                    <xsl:value-of select="concat('CounterValue=', text(), '&#xA;')"/>
                </xsl:for-each>
            </xsl:for-each>
        </xsl:for-each>
    </xsl:template>
</xsl:stylesheet>

然后结果如下:

xsltproc.exe tx.xsl data.xml 
Group=Group1, Time=19042016T08:35:00
CounterName=Load
CounterName=Memory
CounterName=CPU
CounterName=Task
Machine=M-1
CounterValue=2979
CounterValue=0
CounterValue=9599
CounterValue=0
Machine=M-2
CounterValue=2973
CounterValue=0
CounterValue=0
CounterValue=0
Machine=M-3
CounterValue=2985
CounterValue=11889
CounterValue=0
CounterValue=0
Machine=M-4
CounterValue=28
CounterValue=0
CounterValue=0
CounterValue=2980
Group=Group2, Time=19042016T08:35:00
CounterName=Process
CounterName=User
CounterName=command
CounterName=priority
Machine=M-1
CounterValue=0
CounterValue=0
CounterValue=0
CounterValue=4477
Machine=M-2
CounterValue=0
CounterValue=0
CounterValue=0
CounterValue=4540
Machine=M-3
CounterValue=0
CounterValue=0
CounterValue=0
CounterValue=4526
Machine=M-4
CounterValue=0
CounterValue=0
CounterValue=0
CounterValue=4445

有没有人知道如何从xsltproc的结果到上面所需的输出。是否可以使用不同的样式表来完成,或者我需要在当前样式表的结果之后应用后处理脚本?

非常感谢。

2 个答案:

答案 0 :(得分:2)

假设您想要为每个form制作以制表符分隔的“表格”,请尝试以下样式表:

XSLT 1.0

<xsl:stylesheet version="1.0" 
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:t="http://www.test.com">
<xsl:output method="text"/>

<xsl:template match="/t:Data">
    <xsl:for-each select="t:form">
        <!-- table header -->
        <xsl:text>Name=</xsl:text>
        <xsl:value-of select="@Name"/>
        <xsl:text>, Time=</xsl:text>
        <xsl:value-of select="t:Period/@Time"/>
        <xsl:text>&#10;&#10;</xsl:text>
        <!-- column headers -->
        <xsl:for-each select="t:Value">
            <xsl:text>&#9;</xsl:text>
            <xsl:value-of select="@Item"/>
        </xsl:for-each>
        <xsl:text>&#10;</xsl:text>
        <!-- rows -->
        <xsl:for-each select="t:Type">
            <xsl:variable name="i" select="position()" />
            <!-- row header -->
            <xsl:value-of select="."/>
            <!-- row values -->
            <xsl:for-each select="../t:Value">
                <xsl:text>&#9;</xsl:text>
                <xsl:value-of select="t:val[$i]"/>
            </xsl:for-each>
            <xsl:text>&#10;</xsl:text>
        </xsl:for-each>
        <xsl:text>&#10;</xsl:text>
    </xsl:for-each>
</xsl:template>

</xsl:stylesheet>

应用于示例输入的结果将是:

Name=Group1, Time=19042016T08:35:00

    M-1 M-2 M-3 M-4
Load    2979    2973    2985    28
Memory  0   0   11889   0
CPU 9599    0   0   0
Task    0   0   0   2980

Name=Group2, Time=19042016T08:35:00

    M-1 M-2 M-3 M-4
Process 0   0   0   0
User    0   0   0   0
command 0   0   0   0
priority    4477    4540    4526    4445

,使用适当的标签间距(此处:每个标签10个空格),将如下所示:

enter image description here

答案 1 :(得分:0)

Esta esunasolucióndinamicaen Python。 Se adapta a caryquier numero de'M'y cualquier numero de'Items'。

import xmltodict
import sys

orig_stdout = sys.stdout
f = file('out.txt', 'w') # Path al archivo de salida.
sys.stdout = f

with open('file.xml') as fd: # Path del archivo de entrada, el XML
    doc = xmltodict.parse(fd.read()) # Convierte el contenido en un diccionario

cnt_1 = 0
line_1=""
print("Name=" + doc['Data']['form'][0]['@Name'] + ", Time=" + doc['Data']['form'][0]['Period']['@Time'] ) # Cabecera del Grupo1

for item in doc['Data']['form'][0]['Value']: # Almacena en 'line_1' las 'M1,M2..' y las imprime
    cnt_1 += 1 # Cuenta el numero de 'Values'
    line_1 += '\t' + item.get('@Item')
print(line_1.expandtabs(10))

cnt_2 = 0
items_1=[]
for item in doc['Data']['form'][0]['Type']: # Almacena en el array items_1 el texto de cada 'Type'
    cnt_2 += 1 # Cuenta el numero de 'Types'
    items_1.append(item.get('#text'))

items_2=[]
for x in range(cnt_1): # Almacena en el array 'items_2' los valores de cada 'val'
    for i in range(cnt_2):
        items_2.append(doc['Data']['form'][0]['Value'][x]['val'][i]['#text'])

values = lambda lst, sz: [lst[i:i+sz] for i in range(0, len(lst), sz)] # Funcion para dividir el array en partes iguales
data = values(items_2,cnt_1)

lines = list(zip(items_1, data)) # Concatena ambos arrays

for y,j in lines:
    A = y + '\t' + '\t'.join(j)
    print(A.expandtabs(10))

# Inicio del Grupo2

print('\n' + "Name=" + doc['Data']['form'][1]['@Name'] + ", Time=" + doc['Data']['form'][1]['Period']['@Time'] )
cnt_1 = 0
line_1=""

for item in doc['Data']['form'][1]['Value']:
    cnt_1 += 1
    line_1 += '\t' + item.get('@Item')
print(line_1.expandtabs(10))

cnt_2 = 0
items_1=[]
for item in doc['Data']['form'][1]['Type']:
    cnt_2 += 1
    items_1.append(item.get('#text'))

items_2=[]
for x in range(cnt_1):
    for i in range(cnt_2):
        items_2.append(doc['Data']['form'][1]['Value'][x]['val'][i]['#text'])

values = lambda lst, sz: [lst[i:i+sz] for i in range(0, len(lst), sz)]
data = values(items_2,cnt_1)

lines = list(zip(items_1, data))

for y,j in lines:
    B = y + '\t' + '\t'.join(j)
    print(B.expandtabs(10))

sys.stdout = orig_stdout
f.close()

Lo tienes que ejecutar asi:

$ python3 nombre_script.py

萨利达:

$ python3 parser.py

Name=Group1, Time=19042016T08:35:00
          M-1       M-2       M-3       M-4
Load      2979      2973      2985      28
Memory    0         0         11889     0
CPU       9599      0         0         0
Task      0         0         0         2980

Name=Group2, Time=19042016T08:35:00
          M-1       M-2       M-3       M-4
Process   0         0         0         0
User      0         0         0         0
command   0         0         0         0
priority  4477      4540      4526      4445

Lo que hace xmltodict es convertir el contenido del fichero en un diccionario o array asociativo en otros lenguajes。