我正在编写一个应用程序,它从各种源解析动态xml并遍历XML并返回所有唯一元素。
鉴于Xml文件有时非常大,我使用XmlReader来解析由于内存限制而导致的Xml结构。
public IDictionary<string, int> Discover(string filePath)
{
Dictionary<string, string> nodeTable = new Dictionary<string, string>();
using (XmlReader reader = XmlReader.Create(filePath))
{
while (!reader.EOF)
{
if (reader.NodeType == XmlNodeType.Element)
{
if (!nodeTable.ContainsKey(reader.LocalName))
{
nodeTable.Add(reader.LocalName, reader.Depth);
}
}
reader.Read();
}
}
Debug.WriteLine("The node table has {0} items.", nodeTable.Count);
return nodeTable;
}
这是一种享受,并且性能良好且高效,但是最后一部分难以理解,我正在尝试为每个元素生成XPath。
现在,这起初似乎很容易使用这样的东西。
var elements = new Stack<string>();
while (reader.Read())
{
switch (reader.NodeType)
{
case XmlNodeType.Element:
elements.Push(reader.LocalName);
break;
case XmlNodeType.EndElement:
elements.Pop();
break;
case XmlNodeType.Text:
path = string.Join("/", elements.Reverse());
break;
}
}
但这只能给我一部分解决方案。鉴于我希望为树中包含数据的每个节点返回XPath,并检测给定节点树是否包含嵌套的数据集。
即
<customers>
<customer id=2>
<name>ted smith</name>
<addresses>
<address1>
<line1></line1>
</address1>
<address2>
<line1></line1>
<line2></line2>
</address2>
</addresses>
</customer>
<customer id=322>
<name>smith mcsmith</name>
<addresses>
<address1>
<line1></line1>
<line2></line2>
</address1>
<address2>
<line1></line1>
<line2></line2>
</address2>
</addresses>
</customer>
</customers>
请记住,数据是完全动态的,架构是未知的。
所以输出应该包括
/customer/name
/customer/address1/line1
/customer/address1/line2
/customer/address2/line1
/customer/address2/line2
答案 0 :(得分:1)
我喜欢使用递归方法而不是push / pop。见下面的代码
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.IO;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
string input =
"<customers>" +
"<customer id=\"2\">" +
"<name>ted smith</name>" +
"<addresses>" +
"<address1>" +
"<line1></line1>" +
"</address1>" +
"<address2>" +
"<line1></line1>" +
"<line2></line2>" +
"</address2>" +
"</addresses>" +
"</customer>" +
"<customer id=\"322\">" +
"<name>smith mcsmith</name>" +
"<addresses>" +
"<address1>" +
"<line1></line1>" +
"<line2></line2>" +
"</address1>" +
"<address2>" +
"<line1></line1>" +
"<line2></line2>" +
"</address2>" +
"</addresses>" +
"</customer>" +
"</customers>";
StringReader sReader = new StringReader(input);
XmlReader reader = XmlReader.Create(sReader);
Node root = new Node();
ReadNode(reader, root);
}
static bool ReadNode(XmlReader reader, Node node)
{
Boolean done = false;
Boolean endElement = false;
while(done = reader.Read())
{
switch (reader.NodeType)
{
case XmlNodeType.Element:
if (node.name.Length == 0)
{
node.name = reader.Name;
GetAttrubutes(reader, node);
}
else
{
Node newNode = new Node();
newNode.name = reader.Name;
if (node.children == null)
{
node.children = new List<Node>();
}
node.children.Add(newNode);
GetAttrubutes(reader, newNode);
done = ReadNode(reader, newNode);
}
break;
case XmlNodeType.EndElement:
endElement = true;
break;
case XmlNodeType.Text:
node.text = reader.Value;
break;
case XmlNodeType.Attribute:
if (node.attributes == null)
{
node.attributes = new Dictionary<string, string>();
}
node.attributes.Add(reader.Name, reader.Value);
break;
}
if (endElement)
break;
}
return done;
}
static void GetAttrubutes(XmlReader reader, Node node)
{
for (int i = 0; i < reader.AttributeCount; i++)
{
if (i == 0) node.attributes = new Dictionary<string, string>();
reader.MoveToNextAttribute();
node.attributes.Add(reader.Name, reader.Value);
}
}
}
public class Node
{
public string name = string.Empty;
public string text = string.Empty;
public Dictionary<string, string> attributes = null;
public List<Node> children = null;
}
}