我在Sql中有一个带有Xml列的表。所有Xml文件都有相同的模式,我想将这些Xml中的一些合并在一起。
例如对于X1:
<A>
<B>
<C id='101'>
<D id='102'>abcd</D>
</C>
<C id='103'>
<D id='104'>zxcv</D>
</C>
</B>
</A>
和X2:
<A>
<B>
<C id='101'>
<D id='102'>abcd</D>
<D id='501'>abef</D>
</C>
<C id='502'>
<D id='503'>efgh</D>
</C>
</B>
</A>
X1 + X2 = ...
<A>
<B>
<C id='101'>
<D id='102'>abcd</D>
<D id='501'>abef</D>
</C>
<C id='103'>
<D id='104'>zxcv</D>
</C>
<C id='502'>
<D id='503'>efgh</D>
</C>
</B>
</A>
那么哪种选择最好,以及如何:
答案 0 :(得分:2)
我认为解决这个问题的最佳方法是编写一个使用the visitor pattern合并两个XDocument
的类,区别在于我们总是从第一个文档访问节点并行节点第二份文件。
总体设计如下:
class XmlMerger
{
public XDocument Merge(XDocument first, XDocument second);
private XElement MergeElements(XElement first, XElement second);
private XAttribute MergeAttributes(XAttribute first, XAttribute second);
private XText MergeTexts(XText first, XText second);
}
具体实现可能如下所示:
class XmlMerger
{
public XDocument Merge(XDocument first, XDocument second)
{
return new XDocument(MergeElements(first.Root, second.Root));
}
private XElement MergeElements(XElement first, XElement second)
{
if (first == null)
return second;
if (second == null)
return first;
if (first.Name != second.Name)
throw new InvalidOperationException();
var firstId = (string)first.Attribute("id");
var secondId = (string)second.Attribute("id");
// different ids
if (firstId != secondId)
throw new InvalidOperationException();
var result = new XElement(first.Name);
var attributeNames = first.Attributes()
.Concat(second.Attributes())
.Select(a => a.Name)
.Distinct();
foreach (var attributeName in attributeNames)
result.Add(
MergeAttributes(
first.Attribute(attributeName),
second.Attribute(attributeName)));
// text-only elements
if (first.Nodes().OfType<XText>().Any() ||
second.Nodes().OfType<XText>().Any())
{
var firstText = first.Nodes().OfType<XText>().FirstOrDefault();
var secondText = second.Nodes().OfType<XText>().FirstOrDefault();
// we're not handling mixed elements
if (first.Nodes().Any(n => n != firstText) ||
second.Nodes().Any(n => n != secondText))
throw new InvalidOperationException();
result.Add(MergeTexts(firstText, secondText));
}
else
{
var elementNames = first.Elements()
.Concat(second.Elements())
.Select(e => e.Name)
.Distinct();
foreach (var elementName in elementNames)
{
var ids = first.Elements(elementName)
.Concat(second.Elements(elementName))
.Select(e => (string)e.Attribute("id"))
.Distinct();
foreach (var id in ids)
{
XElement firstElement = first.Elements(elementName)
.SingleOrDefault(e => (string)e.Attribute("id") == id);
XElement secondElement = second.Elements(elementName)
.SingleOrDefault(e => (string)e.Attribute("id") == id);
result.Add(MergeElements(firstElement, secondElement));
}
}
}
return result;
}
private XAttribute MergeAttributes(XAttribute first, XAttribute second)
{
if (first == null)
return second;
if (second == null)
return first;
if (first.Name != second.Name)
throw new InvalidOperationException();
if (first.Value == second.Value)
return new XAttribute(first);
// can't merge attributes with different values
throw new InvalidOperationException();
}
private XText MergeTexts(XText first, XText second)
{
if (first == null)
return second;
if (second == null)
return first;
if (first.Value == second.Value)
return new XText(first);
// can't merge texts with different values
throw new InvalidOperationException();
}
}
如果此代码遇到无法处理的内容(例如,具有相同内容但文本不同的节点;或注释),则会引发异常。
答案 1 :(得分:1)
我会在XQuery中做到这一点。它的代码要少得多。以下示例使用纯XQuery 1.0完成。使用XQuery 3.0(因为它支持group by)或使用XQuery Scripting会更容易。
declare variable $sequence := (
<A>
<B>
<C id='101'>
<D id='102'>abcd</D>
</C>
<C id='103'>
<D id='104'>zxcv</D>
</C>
</B>
</A>
,
<A>
<B>
<C id='101'>
<D id='102'>abcd</D>
<D id='501'>abef</D>
</C>
<C id='502'>
<D id='503'>efgh</D>
</C>
</B>
</A>
);
declare function local:merge($dsequence) {
let $dfirst := $dsequence[1]
let $dextended := <D cid="{$dfirst/../@id}" id="{$dfirst/@id}">{$dfirst/text()}</D>
return
if (count($dsequence) eq 1) then
(: nothing to merge :)
$dextended
else
(: merging :)
let $tomerge := local:merge(fn:subsequence($dsequence, 2))
return
if ($tomerge[@cid eq $dextended/@cid] and $tomerge[@id eq $dextended/id]) then
$tomerge
else
($tomerge, $dextended)
};
<A><B> {
let $merged := local:merge($sequence/B/C/D)
let $ckeys := fn:distinct-values(fn:data($merged/@cid))
for $ckey in $ckeys
return
<C id="{$ckey}"> {
for $dkey in fn:distinct-values(data($merged[@cid eq $ckey]/@id))
let $d := ($merged[@cid eq $ckey and @id eq $dkey])[1]
return <D id="{$d/@id}">{$d/text()}</D>
}</C>
}
</B></A>