自动从外部文件替换表

时间:2015-03-06 02:30:30

标签: c# xml

我试图用外部XML文件替换大(~300 MB)XML文件中的多个表。

大约有30,000个表,并且有23,000个XML文件,因为有些表保持不变。

例如,如果我有:

<?xml version="1.0" encoding="UTF-8"?>
<INI>
   <TABLE name="People">
      <ROW>
         <ID>1</ID>
         <Name><![CDATA[Bob]]></Name>
      </ROW>
   </TABLE>
   <TABLE name="Animals">
      <ROW>
         <ID>1</ID>
         <Name><![CDATA[Golden]]></Name>
      </ROW>
   </TABLE>
</INI>

我的文件名为People.xmlAnimals.xml,应该被替换。

如果People.xml是:

   <TABLE name="People">
      <ROW>
         <ID>1</ID>
         <Name><![CDATA[Mary]]></Name>
      </ROW>
      <ROW>
         <ID>2</ID>
         <Name><![CDATA[Bob]]></Name>
      </ROW>
      <ROW>
         <ID>3</ID>
         <Name><![CDATA[Dan]]></Name>
      </ROW>
   </TABLE>

然后主要的大型XML文件将成为:

<?xml version="1.0" encoding="UTF-8"?>
<INI>
   <TABLE name="People">
      <ROW>
         <ID>1</ID>
         <Name><![CDATA[Mary]]></Name>
      </ROW>
      <ROW>
         <ID>2</ID>
         <Name><![CDATA[Bob]]></Name>
      </ROW>
      <ROW>
         <ID>3</ID>
         <Name><![CDATA[Dan]]></Name>
      </ROW>
   </TABLE>
   <TABLE name="Animals">
      <ROW>
         <ID>1</ID>
         <Name><![CDATA[Golden]]></Name>
      </ROW>
   </TABLE>
</INI>

然后Animals.xml也一样。

我已经尝试过调查String.Split(),但我找不到像这样做的方法。

感谢任何帮助。提前谢谢!

1 个答案:

答案 0 :(得分:1)

您可以采取的措施是将来自Mark Fussell的文章Combining the XmlReader and XmlWriter classes for simple streaming transformationsXmlReader流式传输到XmlWriter的基本逻辑,将一个XML文件的内容修补到另一:

public abstract class XmlStreamingEditorBase
{
    readonly XmlReader reader;
    readonly XmlWriter writer;
    readonly Predicate<XmlReader> shouldTransform;

    public XmlStreamingEditorBase(XmlReader reader, XmlWriter writer, Predicate<XmlReader> shouldTransform)
    {
        this.reader = reader;
        this.writer = writer;
        this.shouldTransform = shouldTransform;
    }

    protected XmlReader Reader { get { return reader; } }

    protected XmlWriter Writer { get { return writer; } }

    public void Process()
    {
        while (Reader.Read())
        {
            if (Reader.NodeType == XmlNodeType.Element)
            {
                if (shouldTransform(Reader))
                {
                    EditCurrentElement();
                    continue;
                }
            }
            Writer.WriteShallowNode(Reader);
        }
    }

    protected abstract void EditCurrentElement();
}

public class XmlStreamingEditor : XmlStreamingEditorBase
{
    readonly Action<XmlReader, XmlWriter> transform;

    public XmlStreamingEditor(XmlReader reader, XmlWriter writer, Predicate<XmlReader> shouldTransform, Action<XmlReader, XmlWriter> transform)
        : base(reader, writer, shouldTransform)
    {
        this.transform = transform;
    }

    protected override void EditCurrentElement()
    {
        using (var subReader = Reader.ReadSubtree())
        {
            transform(subReader, Writer);
        }
    }
}

public class XmlStreamingPatcher
{
    readonly XmlReader patchReader;
    readonly XmlReader reader;
    readonly XmlWriter writer;
    readonly Predicate<XmlReader> shouldPatchFrom;
    readonly Func<XmlReader, XmlReader, bool> shouldPatchFromTo;
    bool patched = false;

    public XmlStreamingPatcher(XmlReader reader, XmlWriter writer, XmlReader patchReader, Predicate<XmlReader> shouldPatchFrom, Func<XmlReader, XmlReader, bool> shouldPatchFromTo)
    {
        if (reader == null || writer == null || patchReader == null || shouldPatchFrom == null || shouldPatchFromTo == null)
            throw new ArgumentNullException();
        this.reader = reader;
        this.writer = writer;
        this.patchReader = patchReader;
        this.shouldPatchFrom = shouldPatchFrom;
        this.shouldPatchFromTo = shouldPatchFromTo;
    }

    public bool Process()
    {
        patched = false;
        while (patchReader.Read())
        {
            if (patchReader.NodeType == XmlNodeType.Element)
            {
                if (shouldPatchFrom(patchReader))
                {
                    var editor = new XmlStreamingEditor(reader, writer, ShouldPatchTo, PatchNode);
                    editor.Process();
                    return patched;
                }
            }
        }
        return false;
    }

    bool ShouldPatchTo(XmlReader reader)
    {
        return shouldPatchFromTo(patchReader, reader);
    }

    void PatchNode(XmlReader reader, XmlWriter writer)
    {
        using (var subReader = patchReader.ReadSubtree())
        {
            while (subReader.Read())
            {
                writer.WriteShallowNode(subReader);
                patched = true;
            }
        }
    }
}

public static class XmlReaderExtensions
{
    public static XName GetElementName(this XmlReader reader)
    {
        if (reader == null)
            return null;
        if (reader.NodeType != XmlNodeType.Element)
            return null;
        string localName = reader.Name;
        string uri = reader.NamespaceURI;
        return XName.Get(localName, uri);
    }
}

public static class XmlWriterExtensions
{
    public static void WriteShallowNode(this XmlWriter writer, XmlReader reader)
    {
        // adapted from http://blogs.msdn.com/b/mfussell/archive/2005/02/12/371546.aspx
        if (reader == null)
            throw new ArgumentNullException("reader");

        if (writer == null)
            throw new ArgumentNullException("writer");

        switch (reader.NodeType)
        {
            case XmlNodeType.Element:
                writer.WriteStartElement(reader.Prefix, reader.LocalName, reader.NamespaceURI);
                writer.WriteAttributes(reader, true);
                if (reader.IsEmptyElement)
                {
                    writer.WriteEndElement();
                }
                break;

            case XmlNodeType.Text:
                writer.WriteString(reader.Value);
                break;

            case XmlNodeType.Whitespace:
            case XmlNodeType.SignificantWhitespace:
                writer.WriteWhitespace(reader.Value);
                break;

            case XmlNodeType.CDATA:
                writer.WriteCData(reader.Value);
                break;

            case XmlNodeType.EntityReference:
                writer.WriteEntityRef(reader.Name);
                break;

            case XmlNodeType.XmlDeclaration:
            case XmlNodeType.ProcessingInstruction:
                writer.WriteProcessingInstruction(reader.Name, reader.Value);
                break;

            case XmlNodeType.DocumentType:
                writer.WriteDocType(reader.Name, reader.GetAttribute("PUBLIC"), reader.GetAttribute("SYSTEM"), reader.Value);
                break;

            case XmlNodeType.Comment:
                writer.WriteComment(reader.Value);
                break;

            case XmlNodeType.EndElement:
                writer.WriteFullEndElement();
                break;

            default:
                Debug.WriteLine("unknown NodeType " + reader.NodeType);
                break;

        }
    }
}

要创建实例XmlReaderXmlWriter以从文件读取和写入XML,请使用XmlReader.Create(string)XmlWriter.Create(string)。此外,请务必将大文件流式传输到临时文件中,并在编辑完成后仅替换原始文件。

然后,测试:

public static class TestXmlStreamingPatcher
{
    public static void Test()
    {
        string mainXml = @"<?xml version=""1.0"" encoding=""UTF-8""?>
<INI>
   <TABLE name=""People"">
      <ROW>
         <ID>1</ID>
         <Name><![CDATA[Bob]]></Name>
      </ROW>
   </TABLE>
   <TABLE name=""Animals"">
      <ROW>
         <ID>1</ID>
         <Name><![CDATA[Golden]]></Name>
      </ROW>
   </TABLE>
</INI>
";
        string patchXml = @"<TABLE name=""People"">
      <ROW>
         <ID>1</ID>
         <Name><![CDATA[Mary]]></Name>
      </ROW>
      <ROW>
         <ID>2</ID>
         <Name><![CDATA[Bob]]></Name>
      </ROW>
      <ROW>
         <ID>3</ID>
         <Name><![CDATA[Dan]]></Name>
      </ROW>
   </TABLE>
";
        var patchedXml1 = TestPatch(mainXml, patchXml);
        Debug.WriteLine(patchedXml1);
    }

    private static string TestPatch(string mainXml, string patchXml)
    {
        using (var mainReader = new StringReader(mainXml))
        using (var mainXmlReader = XmlReader.Create(mainReader))
        using (var patchReader = new StringReader(patchXml))
        using (var patchXmlReader = XmlReader.Create(patchReader))
        using (var mainWriter = new StringWriter())
        {
            using (var mainXmlWriter = XmlWriter.Create(mainWriter))
            {
                var patcher = new XmlStreamingPatcher(mainXmlReader, mainXmlWriter, patchXmlReader, ShouldPatchFrom, ShouldPatchFromTo);
                patcher.Process();
            }
            return mainWriter.ToString();
        }
    }

    static bool ShouldPatchFrom(XmlReader reader)
    {
        return reader.GetElementName() == "TABLE";
    }

    static bool ShouldPatchFromTo(XmlReader patchReader, XmlReader toReader)
    {
        if (patchReader.GetElementName() != toReader.GetElementName())
            return false;
        string name = patchReader.GetAttribute("name");
        if (string.IsNullOrEmpty(name))
            return false;
        return name == toReader.GetAttribute("name");
    }
}

此类TestXmlStreamingPatcher.Test()的输出为

<?xml version="1.0" encoding="UTF-8"?>
<INI>
   <TABLE name="People">
      <ROW>
         <ID>1</ID>
         <Name><![CDATA[Mary]]></Name>
      </ROW>
      <ROW>
         <ID>2</ID>
         <Name><![CDATA[Bob]]></Name>
      </ROW>
      <ROW>
         <ID>3</ID>
         <Name><![CDATA[Dan]]></Name>
      </ROW>
   </TABLE>
   <TABLE name="Animals">
      <ROW>
         <ID>1</ID>
         <Name><![CDATA[Golden]]></Name>
      </ROW>
   </TABLE>
</INI>

这就是你想要的。