Private Sub ReadXMLAttributes(ByVal oXML As String)
ReadXMLAttributes(oXML, "mso-infoPathSolution")
End Sub
Private Sub ReadXMLAttributes(ByVal oXML As String, ByVal oTagName As String)
Try
Dim XmlDoc As New Xml.XmlDocument
XmlDoc.LoadXml(oXML)
oFileInfo = New InfoPathDocument
Dim XmlNodes As Xml.XmlNodeList = XmlDoc.GetElementsByTagName(oTagName)
For Each xNode As Xml.XmlNode In XmlNodes
With xNode
oFileInfo.SolutionVersion = .Attributes(InfoPathSolution.solutionVersion).Value
oFileInfo.ProductVersion = .Attributes(InfoPathSolution.productVersion).Value
oFileInfo.PIVersion = .Attributes(InfoPathSolution.PIVersion).Value
oFileInfo.href = .Attributes(InfoPathSolution.href).Value
oFileInfo.name = .Attributes(InfoPathSolution.name).Value
End With
Next
Catch ex As Exception
MsgBox(ex.Message, MsgBoxStyle.OkOnly, "ReadXMLAttributes")
End Try
End Sub
这样可行,但如果重新排序属性,它仍会遇到下面的问题。我能想到避免这个问题的唯一方法是将属性名称硬编码到我的程序中,让它通过循环解析标记并搜索指定的标记来处理条目。
注意:InfoPathDocument是我制作的自定义类,它并不复杂:
Public Class InfoPathDocument
Private _sVersion As String
Private _pVersion As String
Private _piVersion As String
Private _href As String
Private _name As String
Public Property SolutionVersion() As String
Get
Return _sVersion
End Get
Set(ByVal value As String)
_sVersion = value
End Set
End Property
Public Property ProductVersion() As String
Get
Return _pVersion
End Get
Set(ByVal value As String)
_pVersion = value
End Set
End Property
Public Property PIVersion() As String
Get
Return _piVersion
End Get
Set(ByVal value As String)
_piVersion = value
End Set
End Property
Public Property href() As String
Get
Return _href
End Get
Set(ByVal value As String)
If value.ToLower.StartsWith("file:///") Then
value = value.Substring(8)
End If
_href = Form1.PathToUNC(URLDecode(value))
End Set
End Property
Public Property name() As String
Get
Return _name
End Get
Set(ByVal value As String)
_name = value
End Set
End Property
Sub New()
End Sub
Sub New(ByVal oSolutionVersion As String, ByVal oProductVersion As String, ByVal oPIVersion As String, ByVal oHref As String, ByVal oName As String)
SolutionVersion = oSolutionVersion
ProductVersion = oProductVersion
PIVersion = oPIVersion
href = oHref
name = oName
End Sub
Public Function URLDecode(ByVal StringToDecode As String) As String
Dim TempAns As String = String.Empty
Dim CurChr As Integer = 1
Dim oRet As String = String.Empty
Try
Do Until CurChr - 1 = Len(StringToDecode)
Select Case Mid(StringToDecode, CurChr, 1)
Case "+"
oRet &= " "
Case "%"
oRet &= Chr(Val("&h" & Mid(StringToDecode, CurChr + 1, 2)))
CurChr = CurChr + 2
Case Else
oRet &= Mid(StringToDecode, CurChr, 1)
End Select
CurChr += 1
Loop
Catch ex As Exception
MsgBox(ex.Message, MsgBoxStyle.OkOnly, "URLDecode")
End Try
Return oRet
End Function
End Class
我正在开发一个需要读取XML文档的项目,特别是Microsoft InfoPath中保存的表单。
以下是我将要使用的一个简单示例以及可能有用的一些背景信息:
<?xml version="1.0" encoding="UTF-8"?>
<?mso-infoPathSolution solutionVersion="1.0.0.2" productVersion="12.0.0" PIVersion="1.0.0.0" href="file:///C:\Users\darren\Desktop\simple_form.xsn" name="urn:schemas-microsoft-com:office:infopath:simple-form:-myXSD-2009-05-15T14-16-37" ?>
<?mso-application progid="InfoPath.Document" versionProgid="InfoPath.Document.2"?>
<my:myFields xmlns:my="http://schemas.microsoft.com/office/infopath/2003/myXSD/2009-05-15T14:16:37" xml:lang="en-us">
<my:first_name>John</my:first_name>
<my:last_name>Doe</my:last_name>
</my:myFields>
我现在的目标是提取表单的versionID和位置。正则表达式很容易:
Dim _doc As New XmlDocument
_doc.Load(_thefile)
Dim oRegex As String = "^solutionVersion=""(?<sVersion>[0-9.]*)"" productVersion=""(?<pVersion>[0-9.]*)"" PIVersion=""(?<piVersion>[0-9.]*)"" href=""(?<href>.*)"" name=""(?<name>.*)""$"
Dim rx As New Regex(oRegex), m As Match = Nothing
For Each section As XmlNode In _doc.ChildNodes
m = rx.Match(section.InnerText.Trim)
If m.Success Then
Dim temp As String = m.Groups("name").Value.Substring(m.Groups("name").Value.ToLower.IndexOf("infopath") + ("infopath").Length + 1)
fileName = temp.Substring(0, temp.LastIndexOf(":"))
fileVersion = m.Groups("sVersion").Value
End If
Next
这个工作解决方案带来的唯一问题是,如果InfoPath文档标题中的架构发生了变化......例如解决方案版本和产品版本属性交换位置(微软LOVES做这样的事情,似乎)。
所以我选择尝试使用VB.NET的XML解析功能来帮助我实现上述结果,sans-regex。
包含我需要的信息的ChildNode
对象中的_doc
,但它没有任何ChildNodes:
_doc.ChildNode(1).HasChildNodes = False
任何人都可以帮我解决这个问题吗?
答案 0 :(得分:1)
处理指令是XML文档的一部分,但它们的属性不会被解析。试试这段代码:
// Load the original xml...
var xml = new XmlDocument();
xml.Load( _thefile );
// Select out the processing instruction...
var infopathProcessingInstruction = xml.SelectSingleNode( "/processing-instruction()[local-name(.) = \"mso-infoPathSolution\"]" );
// Since the processing instruction does not expose it's attributes, create a new XML document...
var xmlInfoPath = new XmlDocument();
xmlInfoPath.LoadXml("<data " + infopathProcessingInstruction.InnerText + " />");
// Get the data...
var solutionVersion = xmlInfoPath.DocumentElement.GetAttribute("solutionVersion");
var productVersion = xmlInfoPath.DocumentElement.GetAttribute("productVersion");
答案 1 :(得分:0)
问题是您要解析的标签实际上不是XML文档的一部分。它们是包含处理指令的XML-Prolog。因此它们不会作为元素在XmlDocument中可用。
在剥离&lt ;?之后,我唯一的想法是(除了查看文档如何访问这些元素之外)将mso-infoPathSolution-element移动到它自己的XmlDocument中。 ?&GT;离开并用&lt;替换它们/取代。然后,您可以访问属性,无论其顺序如何。