使用CDATA进行VBScript XML缩进

时间:2014-07-07 13:44:22

标签: xml vbscript indentation

我想从VBScript生成XML文件。我发现Microsoft.XMLDOM但似乎这个类不知道如何缩进我的输出文件。我尝试使用MSXML2重新启动我的xml,但是当我使用它时,我的CDATA部分消失了......

VBScript:

set xml = CreateObject("Microsoft.XMLDOM")
set encoding = xml.createProcessingInstruction("xml", "version='1.0' encoding='ISO-8859-1'")
xml.insertBefore encoding, xml.childNodes.Item(0)
set foo = xml.createElement("foo")
foo.setAttribute "foobar", "42"

set bar = xml.createElement("bar")
set cdata = xml.createCDATASection("Hello World!")
bar.appendChild cdata
foo.appendChild bar
xml.appendChild(foo)

' XML okay but ugly because no indentation
xml.save("a.xml")

' XML pretty but the 'cdata' sections vanished...
xmlSave xml, "b.xml"

function xmlSave(xml, filename)
    set rdr = CreateObject("MSXML2.SAXXMLReader")
    set wrt = CreateObject("MSXML2.MXXMLWriter")
    Set oStream = CreateObject("ADODB.STREAM")
    oStream.Open
    oStream.Charset = "ISO-8859-1"

    wrt.indent = True
    wrt.encoding = "ISO-8859-1"
    wrt.output = oStream
    Set rdr.contentHandler = wrt
    Set rdr.errorHandler = wrt

    rdr.Parse xml
    wrt.flush

    oStream.SaveToFile filename, 2
end function

输出:

$ cscript //nologo test.vbs && cat a.xml && echo -e "------" && cat b.xml
<?xml version="1.0" encoding="ISO-8859-1"?>
<foo foobar="42"><bar><![CDATA[Hello World!]]></bar></foo>
------
<?xml version="1.0" encoding="ISO-8859-1" standalone="no"?>
<foo foobar="42">
        <bar>Hello World!</bar>
</foo>

如何在不丢失CDATA部分的情况下轻松地使用XMLDOM获取漂亮的缩进XML?

2 个答案:

答案 0 :(得分:1)

我找到了有用的东西......

Function ParseAndSave(filePath, xmlDoc)
    set xmlWriter = CreateObject("MSXML2.MXXMLWriter")
    set xmlReader = CreateObject("MSXML2.SAXXMLReader")
    Set xmlStream = CreateObject("ADODB.STREAM")
    xmlStream.Open
    xmlStream.Charset = "ISO-8859-1"

    xmlWriter.output = xmlStream
    xmlWriter.indent = True
    xmlWriter.standalone = True
    xmlWriter.encoding = "ISO-8859-1"

    Set xmlReader.contentHandler = xmlWriter
    Set xmlReader.DTDHandler = xmlWriter
    Set xmlReader.errorHandler = xmlWriter
    xmlReader.putProperty "http://xml.org/sax/properties/lexical-handler", xmlWriter
    xmlReader.putProperty "http://xml.org/sax/properties/declaration-handler", xmlWriter

    xmlReader.parse xmlDoc
    xmlWriter.flush

    xmlStream.SaveToFile filePath, 2

    xmlStream.Close
    Set xmlStream = Nothing
    Set xmlWriter = Nothing
    Set xmlReader = Nothing
End Function 

答案 1 :(得分:1)

我在代码中看到的第一个误解是假设<?xml ...?>是一个处理指令。不是这种情况。它是 XML声明。您无法使用createProcessingInstruction()生成它。尝试将导致输出文档损坏。

下一个误解是XML必须看起来很整洁。或者说你需要CDATA。

这两点可能有点争议,但一般来说既不整洁的XML也不符合CDATA的任何技术目的。如果您的强迫症允许,请克服它们。

第三个误解是&#34;缩进&#34;除了只包含空格的文本节点之外的任何东西。 XML保留您的数据,文本节点(空白或非空白)数据。如果您不添加任何仅包含换行符和空格/制表符的文本节点,那么输出中就不会有任何文本节点。

简而言之:如果要缩进节点,则必须手动添加缩进。这个过程通常被称为&#34;漂亮印刷&#34;。

你可以使用像这样的这样的递归函数来打印一个文档(得到这个&#34;正确&#34;它比人们想象的要复杂,我无法保证输出正是你如何做到的)

' public function, pass a DOMDocument to it. modifies that document in-place.
Sub IndentDocument(doc, indentStr)
  IndentNode doc.DocumentElement, Left(indentStr, 1), Len(indentStr), 0
End Sub

' --------------------------------------------------------------------------
' helper functions, don't call directly...    
Sub IndentNode(node, indentChar, perLevel, level)
  Dim parent, child, doc

  If node.NodeType = 9 Then
    IndentNode node.DocumentElement, indentChar, perLevel, level
  ElseIf CanIndent(node) Then
    IndentRemove node
    Set doc = node.OwnerDocument
    If Not node Is doc.DocumentElement Then
      Set parent = node.ParentNode
      If node Is parent.FirstChild Or CanIndent(node.PreviousSibling) Then
        parent.InsertBefore doc.createTextNode(vbLf & String(level * perLevel, indentChar)), node
      End If
      If node Is parent.LastChild Then
        parent.InsertBefore doc.createTextNode(vbLf & String((level - 1) * perLevel, indentChar)), Nothing
      End If
    End If
    If node.ChildNodes.Length > 0 Then
      For Each child In node.ChildNodes
        IndentNode child, indentChar, perLevel, level + 1
      Next
    End If
  End If
End Sub

Function CanIndent(node)
  If node Is Nothing Then
    CanIndent = False
  Else
    CanIndent = node.NodeType = 1 Or node.NodeType = 8
  End If
End Function

Sub IndentRemove(node)
  Dim child, i

  For i = node.ChildNodes.Length To 1 Step -1
    Set child = node.ChildNodes(i - 1)
    If child.NodeType = 3 And Trim(Replace(Replace(child.Text, vbCr, ""), vbLf, "")) = "" Then
      node.RemoveChild child
    End If
    Set child = Nothing
  Next
End Sub

用法

Set doc = CreateObject("MSXML2.DOMDocument")

' load skeleton XML document with pre-defined output encoding
doc.LoadXML "<?xml version=""1.0"" encoding=""ISO-8859-1""?><foo />"


' ... now create all kinds of nodes here ...


' indent document with two spaces and save
IndentDocument doc, "  "
doc.Save "foo.xml"

总的来说:如果你想对你创建的任何新文件使用ISO-8859-1,请仔细考虑。 UTF-8是现在的方式,你不应该使用遗留文件编码来获取任何新的东西。特别是在XML中,因为所有 XML解析器都理解UTF-8。