XMLDiff无法正确识别差异?

时间:2017-04-19 14:55:07

标签: c# xml diff xmldiff

我在这里缺少什么? XMLDiff是否应关注元素名称并寻求最佳匹配以正确识别以下更改?

用于在两个XML文件之间进行比较的Helper类:

public class XMLDiffer
{
    public XDocument Diff(string originalXML, string changedXML)
    {
        //http://msdn2.microsoft.com/en-us/library/aa302294.aspx
        XmlDiff xmlDiff = new XmlDiff(XmlDiffOptions.IgnoreChildOrder | XmlDiffOptions.IgnoreComments | XmlDiffOptions.IgnoreWhitespace);

        xmlDiff.Algorithm = XmlDiffAlgorithm.Precise;
        StringBuilder diffgramStringBuilder = new StringBuilder();
        bool xmlComparisonResult = false;
        using (StringReader legacySr = new StringReader(originalXML), nextgenSr = new StringReader(changedXML))
        {
            using (XmlReader legacyReader = XmlReader.Create(legacySr), nextgenReader = XmlReader.Create(nextgenSr))
            {
                using (StringWriter sw = new StringWriter(diffgramStringBuilder))
                {
                    using (XmlWriter diffgramWriter = XmlWriter.Create(sw))
                    {
                        xmlComparisonResult = xmlDiff.Compare(legacyReader, nextgenReader, diffgramWriter);
                    }
                }
            }
        }
        XDocument xdoc = XDocument.Parse(diffgramStringBuilder.ToString());
        return xdoc;
    }

    public string GetChangeHtml(string originalXML, string changedXML)
    {
        XmlDiffView view = new XmlDiffView();
        var diffgram = Diff(originalXML, changedXML);
        string ret = "";
        using (StringReader legacySr = new StringReader(originalXML), diffGramSr = new StringReader(diffgram.ToString()))
        {
            using (XmlReader legacyReader = XmlReader.Create(legacySr), diffgramReader = XmlReader.Create(diffGramSr))
            {
                using (StringWriter sw = new StringWriter())
                {
                    view.Load(legacyReader, diffgramReader);
                    view.GetHtml(sw);
                    ret = sw.ToString();
                }
            }
        }
        return ret;
    }

}

进行以下测试:

[TestMethod]
public void XMLDiff_AreNotSame_GetChangeHtmlAll()
{
    //Arrange
    string source = "<root><child>some text</child><child>more text</child><child1>REMOVED</child1></root>";
    //Ordering of the generic child nodes is not changed,  but it might
    string target = "<root><child>some text CHANGE</child><child>more text</child><child>ADDITION</child></root>";

    XMLDiffer differ = new XMLDiffer();

    //Act
    var diffview = differ.GetChangeHtml(source, target);

    //Assert
    Assert.IsNotNull(diffview);
}

生成以下内容(添加了html和table元素): https://pste.eu/p/Fm7Z.html

有关库的更多信息:http://msdn2.microsoft.com/en-us/library/aa302294.aspx

参考的Nuget链接:https://www.nuget.org/packages/XMLDiffPatch/

1 个答案:

答案 0 :(得分:0)

我最终实现了以下类来进行更改:

public class XMLComparer : IEqualityComparer<XNode>
{
    public bool Equals(XNode e1, XNode e2)
    {
        if (!(e1 is XElement)) return true;
        if (!(e2 is XElement)) return false;
        var el1 = e1 as XElement;
        var el2 = e2 as XElement;
        return Tuple.Create(el1.Name, el1.Value).Equals(Tuple.Create(el2.Name, el2.Value));
    }

    public int GetHashCode(XNode n)
    {
        if (!(n is XElement)) return 0;
        var el = n as XElement;
        return Tuple.Create(el.Name, el.Value).GetHashCode();
    }

}

public class XMLDifference
{
    public bool IsNew { get; set; }
    public XElement Node { get; set; }
}

public class XMLDifferenceComparer
{
    public List<XMLDifference> GetDifferences(string original, string changed)
    {
        List<XMLDifference> ret = new List<XMLDifference>();
        var originalDoc = XDocument.Parse(original);
        var changedDoc = XDocument.Parse(changed);
        //Get differences that are present in new xml version
        var differences = changedDoc.Root.Descendants().Except(originalDoc.Root.Descendants(), new XMLComparer());
        ret.AddRange(GetList(differences, true));
        //Get differences that have changed since the old xml version
        var oldValues = originalDoc.Root.Descendants().Except(changedDoc.Root.Descendants(), new XMLComparer());
        ret.AddRange(GetList(oldValues, false));
        return ret;
    }

    private List<XMLDifference> GetList(IEnumerable<XNode> nodes, bool isNew)
    {
        List<XMLDifference> ret = new List<XMLDifference>();
        foreach (XNode d in nodes)
        {
            var diff = new XMLDifference();
            diff.IsNew = isNew;

            var el = d as XElement;
            diff.Node = el;
            ret.Add(diff);
        }
        return ret;
    }
}

这可以识别更改,但不是特定于元素,它不能映射确切地更改了哪个元素以及如何导致每个元素缺少唯一标识符。

此解决方案的主要思想来自:https://gist.github.com/krcourville/6933451