我有以下xml:
<?xml version="1.0" encoding="utf-8"?>
<parent>
<element href="www.something.com" title="First">
<element href="www.something.com" title="Second">
<element href="www.something.com" title="Third">
</element>
</element>
</element>
<element href="www.something.com" title="Some title"></element>
<element href="www.something.com" title="Another">
<element href="www.something.com" title="Extra">
<element href="www.something.com" title="Page">
<element href="www.something.com" title="Target">
</element>
</element>
</element>
</element>
</parent>
如何使用从父级到最深的子级的title属性解析此xml并格式化每个元素的路径?
每个元素可以包含零个,一个或多个子元素。
示例:
<element href="www.something.com" title="First"> // path: First
<element href="www.something.com" title="Second"> // path: First / Second
<element href="www.something.com" title="Third"> //path: First / Second /Third
</element>
</element>
</element>
<element href="www.something.com" title="Some title"></element> // path: Some title
<element href="www.something.com" title="Another"> // path: Another
<element href="www.something.com" title="Extra"> // path: Another / Extra
<element href="www.something.com" title="Page"> // path: Another / Extra / Page
<element href="www.something.com" title="Target"> //path: Another / Extra / Page / Target
</element>
</element>
</element>
</element>
答案 0 :(得分:0)
使用LINQ to XML解析它,然后构建您需要的路径。对于任何给定元素:
var titles = element.AncestorsAndSelf()
.SelectMany(e => e.Attributes("title"))
.Select(a => a.Value)
.Reverse();
var path = string.Join(" / ", titles);
答案 1 :(得分:0)
如果您希望生成基于预定义属性名称唯一指定XML文档中元素的XPath字符串,您可以根据这两个答案组合解决方案:
创建以下扩展方法:
public static class XExtensions
{
public static string GetAbsoluteXPathByAttribute(this XElement element, string attributeName)
{
Func<XElement, string> relativeXPath = e => RelativeXPathByAttribute(e, attributeName);
return GetXPath(element, relativeXPath);
}
static string RelativeXPathByAttribute(XElement element, string attributeName)
{
var attr = element.Attribute(attributeName);
if (attr != null)
{
var name = string.Format("*[@{0}={1}]", attributeName, XPathLiteral(attr.Value));
var index = IndexPosition(element, e => { var a = e.Attribute(attributeName); return a != null && a.Value == attr.Value; });
if (index != -1)
name = string.Format(NumberFormatInfo.InvariantInfo, "/{0}[{1}]", name, index);
return name;
}
else if (!string.IsNullOrEmpty(element.Name.Namespace.ToString()))
{
string name = string.Format("*[local-name()={0}]", XPathLiteral(element.Name.LocalName));
var index = IndexPosition(element, e => e.Name.LocalName == element.Name.LocalName);
if (index != -1)
name = string.Format(NumberFormatInfo.InvariantInfo, "/{0}[{1}]", name, index);
return name;
}
else
{
string name = element.Name.LocalName;
var index = IndexPosition(element, e => e.Name == element.Name);
if (index != -1)
name = string.Format(NumberFormatInfo.InvariantInfo, "/{0}[{1}]", name, index);
return name;
}
}
public static int IndexPosition(this XElement element)
{
return IndexPosition(element, e => e.Name == element.Name);
}
public static int IndexPosition(XElement element, Func<XElement, bool> isRelevant)
{
if (element == null || isRelevant == null)
throw new ArgumentNullException();
if (element.Parent == null)
return -1;
int i = 1; // Indexes for nodes start at 1, not 0
foreach (var sibling in element.Parent.Elements().Where(isRelevant))
{
if (sibling == element)
{
return i;
}
i++;
}
throw new InvalidOperationException("element has been removed from its parent.");
}
static string GetXPath(XElement element, Func<XElement, string> relativeXPath)
{
if (element == null)
throw new ArgumentNullException("element");
if (relativeXPath == null)
throw new ArgumentNullException("relativeXPath");
var ancestors = from e in element.Ancestors()
select relativeXPath(e);
return string.Concat(ancestors.Reverse().ToArray()) +
relativeXPath(element);
}
/// <summary>
/// Produce an XPath literal equal to the value if possible; if not, produce
/// an XPath expression that will match the value.
/// From https://stackoverflow.com/questions/1341847/special-character-in-xpath-query
///
/// Note that this function will produce very long XPath expressions if a value
/// contains a long run of double quotes.
/// </summary>
/// <param name="value">The value to match.</param>
/// <returns>If the value contains only single or double quotes, an XPath
/// literal equal to the value. If it contains both, an XPath expression,
/// using concat(), that evaluates to the value.</returns>
static string XPathLiteral(string value)
{
// if the value contains only single or double quotes, construct
// an XPath literal
if (!value.Contains("\""))
{
return "\"" + value + "\"";
}
if (!value.Contains("'"))
{
return "'" + value + "'";
}
// if the value contains both single and double quotes, construct an
// expression that concatenates all non-double-quote substrings with
// the quotes, e.g.:
//
// concat("foo", '"', "bar")
StringBuilder sb = new StringBuilder();
sb.Append("concat(");
string[] substrings = value.Split('\"');
for (int i = 0; i < substrings.Length; i++)
{
bool needComma = (i > 0);
if (substrings[i] != "")
{
if (i > 0)
{
sb.Append(", ");
}
sb.Append("\"");
sb.Append(substrings[i]);
sb.Append("\"");
needComma = true;
}
if (i < substrings.Length - 1)
{
if (needComma)
{
sb.Append(", ");
}
sb.Append("'\"'");
}
}
sb.Append(")");
return sb.ToString();
}
}
然后,使用和不使用命名空间进行测试:
public class TestClass
{
static string GetXml()
{
string xml = @"<?xml version=""1.0"" encoding=""utf-8""?>
<parent>
<element href=""www.something.com"" title=""First"">
<element href=""www.something.com"" title=""Second"">
<element href=""www.something.com"" title=""Third"">
</element>
</element>
</element>
<element href=""www.something.com"" title=""Some title""></element>
<element href=""www.something.com"" title=""Another"">
<element href=""www.something.com"" title=""Extra"">
<element href=""www.something.com"" title=""Page"">
<element href=""www.something.com"" title=""Target"">
</element>
</element>
</element>
</element>
</parent>";
return xml;
}
static string GetXmlWithNamespace()
{
string xml = @"<?xml version=""1.0"" encoding=""utf-8""?>
<parent
xmlns=""urn:schemas-microsoft-com:office:spreadsheet""
xmlns:o=""urn:schemas-microsoft-com:office:office"">
<element href=""www.something.com"" title=""First"">
<element href=""www.something.com"" title=""Second"">
<element href=""www.something.com"" title=""Third"">
</element>
</element>
</element>
<element href=""www.something.com"" title=""Some title""></element>
<element href=""www.something.com"" title=""Another"">
<element href=""www.something.com"" title=""Extra"">
<element href=""www.something.com"" title=""Page"">
<element href=""www.something.com"" title=""Target"">
</element>
</element>
</element>
</element>
</parent>";
return xml;
}
public static void Test()
{
Test(GetXml());
Test(GetXmlWithNamespace());
}
public static void Test(string xml)
{
var doc = XDocument.Parse(xml);
var paths = doc.Root.DescendantsAndSelf().Select(e => new { Element = e, Path = e.GetAbsoluteXPathByAttribute("title") }).ToList();
Debug.WriteLine(JsonConvert.SerializeObject(paths.Select(pair => pair.Path), Formatting.Indented));
foreach (var pair in paths)
{
var result = doc.XPathSelectElement(pair.Path);
if (result != pair.Element)
Debug.Assert(result == pair.Element); // No asserts
}
}
}
这会生成以下路径,这些路径正确评估到相关元素:
"parent", "parent/*[@title=\"First\"][1]", "parent/*[@title=\"First\"][1]/*[@title=\"Second\"][1]", "parent/*[@title=\"First\"][1]/*[@title=\"Second\"][1]/*[@title=\"Third\"][1]", "parent/*[@title=\"Some title\"][1]", "parent/*[@title=\"Another\"][1]", "parent/*[@title=\"Another\"][1]/*[@title=\"Extra\"][1]", "parent/*[@title=\"Another\"][1]/*[@title=\"Extra\"][1]/*[@title=\"Page\"][1]", "parent/*[@title=\"Another\"][1]/*[@title=\"Extra\"][1]/*[@title=\"Page\"][1]/*[@title=\"Target\"][1]"