我试图用外部XML文件替换大(~300 MB)XML文件中的多个表。
大约有30,000个表,并且有23,000个XML文件,因为有些表保持不变。
例如,如果我有:
<?xml version="1.0" encoding="UTF-8"?>
<INI>
<TABLE name="People">
<ROW>
<ID>1</ID>
<Name><![CDATA[Bob]]></Name>
</ROW>
</TABLE>
<TABLE name="Animals">
<ROW>
<ID>1</ID>
<Name><![CDATA[Golden]]></Name>
</ROW>
</TABLE>
</INI>
我的文件名为People.xml
和Animals.xml
,应该被替换。
如果People.xml
是:
<TABLE name="People">
<ROW>
<ID>1</ID>
<Name><![CDATA[Mary]]></Name>
</ROW>
<ROW>
<ID>2</ID>
<Name><![CDATA[Bob]]></Name>
</ROW>
<ROW>
<ID>3</ID>
<Name><![CDATA[Dan]]></Name>
</ROW>
</TABLE>
然后主要的大型XML文件将成为:
<?xml version="1.0" encoding="UTF-8"?>
<INI>
<TABLE name="People">
<ROW>
<ID>1</ID>
<Name><![CDATA[Mary]]></Name>
</ROW>
<ROW>
<ID>2</ID>
<Name><![CDATA[Bob]]></Name>
</ROW>
<ROW>
<ID>3</ID>
<Name><![CDATA[Dan]]></Name>
</ROW>
</TABLE>
<TABLE name="Animals">
<ROW>
<ID>1</ID>
<Name><![CDATA[Golden]]></Name>
</ROW>
</TABLE>
</INI>
然后Animals.xml
也一样。
我已经尝试过调查String.Split()
,但我找不到像这样做的方法。
感谢任何帮助。提前谢谢!
答案 0 :(得分:1)
您可以采取的措施是将来自Mark Fussell的文章Combining the XmlReader and XmlWriter classes for simple streaming transformations的XmlReader
流式传输到XmlWriter
的基本逻辑,将一个XML文件的内容修补到另一:
public abstract class XmlStreamingEditorBase
{
readonly XmlReader reader;
readonly XmlWriter writer;
readonly Predicate<XmlReader> shouldTransform;
public XmlStreamingEditorBase(XmlReader reader, XmlWriter writer, Predicate<XmlReader> shouldTransform)
{
this.reader = reader;
this.writer = writer;
this.shouldTransform = shouldTransform;
}
protected XmlReader Reader { get { return reader; } }
protected XmlWriter Writer { get { return writer; } }
public void Process()
{
while (Reader.Read())
{
if (Reader.NodeType == XmlNodeType.Element)
{
if (shouldTransform(Reader))
{
EditCurrentElement();
continue;
}
}
Writer.WriteShallowNode(Reader);
}
}
protected abstract void EditCurrentElement();
}
public class XmlStreamingEditor : XmlStreamingEditorBase
{
readonly Action<XmlReader, XmlWriter> transform;
public XmlStreamingEditor(XmlReader reader, XmlWriter writer, Predicate<XmlReader> shouldTransform, Action<XmlReader, XmlWriter> transform)
: base(reader, writer, shouldTransform)
{
this.transform = transform;
}
protected override void EditCurrentElement()
{
using (var subReader = Reader.ReadSubtree())
{
transform(subReader, Writer);
}
}
}
public class XmlStreamingPatcher
{
readonly XmlReader patchReader;
readonly XmlReader reader;
readonly XmlWriter writer;
readonly Predicate<XmlReader> shouldPatchFrom;
readonly Func<XmlReader, XmlReader, bool> shouldPatchFromTo;
bool patched = false;
public XmlStreamingPatcher(XmlReader reader, XmlWriter writer, XmlReader patchReader, Predicate<XmlReader> shouldPatchFrom, Func<XmlReader, XmlReader, bool> shouldPatchFromTo)
{
if (reader == null || writer == null || patchReader == null || shouldPatchFrom == null || shouldPatchFromTo == null)
throw new ArgumentNullException();
this.reader = reader;
this.writer = writer;
this.patchReader = patchReader;
this.shouldPatchFrom = shouldPatchFrom;
this.shouldPatchFromTo = shouldPatchFromTo;
}
public bool Process()
{
patched = false;
while (patchReader.Read())
{
if (patchReader.NodeType == XmlNodeType.Element)
{
if (shouldPatchFrom(patchReader))
{
var editor = new XmlStreamingEditor(reader, writer, ShouldPatchTo, PatchNode);
editor.Process();
return patched;
}
}
}
return false;
}
bool ShouldPatchTo(XmlReader reader)
{
return shouldPatchFromTo(patchReader, reader);
}
void PatchNode(XmlReader reader, XmlWriter writer)
{
using (var subReader = patchReader.ReadSubtree())
{
while (subReader.Read())
{
writer.WriteShallowNode(subReader);
patched = true;
}
}
}
}
public static class XmlReaderExtensions
{
public static XName GetElementName(this XmlReader reader)
{
if (reader == null)
return null;
if (reader.NodeType != XmlNodeType.Element)
return null;
string localName = reader.Name;
string uri = reader.NamespaceURI;
return XName.Get(localName, uri);
}
}
public static class XmlWriterExtensions
{
public static void WriteShallowNode(this XmlWriter writer, XmlReader reader)
{
// adapted from http://blogs.msdn.com/b/mfussell/archive/2005/02/12/371546.aspx
if (reader == null)
throw new ArgumentNullException("reader");
if (writer == null)
throw new ArgumentNullException("writer");
switch (reader.NodeType)
{
case XmlNodeType.Element:
writer.WriteStartElement(reader.Prefix, reader.LocalName, reader.NamespaceURI);
writer.WriteAttributes(reader, true);
if (reader.IsEmptyElement)
{
writer.WriteEndElement();
}
break;
case XmlNodeType.Text:
writer.WriteString(reader.Value);
break;
case XmlNodeType.Whitespace:
case XmlNodeType.SignificantWhitespace:
writer.WriteWhitespace(reader.Value);
break;
case XmlNodeType.CDATA:
writer.WriteCData(reader.Value);
break;
case XmlNodeType.EntityReference:
writer.WriteEntityRef(reader.Name);
break;
case XmlNodeType.XmlDeclaration:
case XmlNodeType.ProcessingInstruction:
writer.WriteProcessingInstruction(reader.Name, reader.Value);
break;
case XmlNodeType.DocumentType:
writer.WriteDocType(reader.Name, reader.GetAttribute("PUBLIC"), reader.GetAttribute("SYSTEM"), reader.Value);
break;
case XmlNodeType.Comment:
writer.WriteComment(reader.Value);
break;
case XmlNodeType.EndElement:
writer.WriteFullEndElement();
break;
default:
Debug.WriteLine("unknown NodeType " + reader.NodeType);
break;
}
}
}
要创建实例XmlReader
和XmlWriter
以从文件读取和写入XML,请使用XmlReader.Create(string)
和XmlWriter.Create(string)
。此外,请务必将大文件流式传输到临时文件中,并在编辑完成后仅替换原始文件。
然后,测试:
public static class TestXmlStreamingPatcher
{
public static void Test()
{
string mainXml = @"<?xml version=""1.0"" encoding=""UTF-8""?>
<INI>
<TABLE name=""People"">
<ROW>
<ID>1</ID>
<Name><![CDATA[Bob]]></Name>
</ROW>
</TABLE>
<TABLE name=""Animals"">
<ROW>
<ID>1</ID>
<Name><![CDATA[Golden]]></Name>
</ROW>
</TABLE>
</INI>
";
string patchXml = @"<TABLE name=""People"">
<ROW>
<ID>1</ID>
<Name><![CDATA[Mary]]></Name>
</ROW>
<ROW>
<ID>2</ID>
<Name><![CDATA[Bob]]></Name>
</ROW>
<ROW>
<ID>3</ID>
<Name><![CDATA[Dan]]></Name>
</ROW>
</TABLE>
";
var patchedXml1 = TestPatch(mainXml, patchXml);
Debug.WriteLine(patchedXml1);
}
private static string TestPatch(string mainXml, string patchXml)
{
using (var mainReader = new StringReader(mainXml))
using (var mainXmlReader = XmlReader.Create(mainReader))
using (var patchReader = new StringReader(patchXml))
using (var patchXmlReader = XmlReader.Create(patchReader))
using (var mainWriter = new StringWriter())
{
using (var mainXmlWriter = XmlWriter.Create(mainWriter))
{
var patcher = new XmlStreamingPatcher(mainXmlReader, mainXmlWriter, patchXmlReader, ShouldPatchFrom, ShouldPatchFromTo);
patcher.Process();
}
return mainWriter.ToString();
}
}
static bool ShouldPatchFrom(XmlReader reader)
{
return reader.GetElementName() == "TABLE";
}
static bool ShouldPatchFromTo(XmlReader patchReader, XmlReader toReader)
{
if (patchReader.GetElementName() != toReader.GetElementName())
return false;
string name = patchReader.GetAttribute("name");
if (string.IsNullOrEmpty(name))
return false;
return name == toReader.GetAttribute("name");
}
}
此类TestXmlStreamingPatcher.Test()
的输出为
<?xml version="1.0" encoding="UTF-8"?>
<INI>
<TABLE name="People">
<ROW>
<ID>1</ID>
<Name><![CDATA[Mary]]></Name>
</ROW>
<ROW>
<ID>2</ID>
<Name><![CDATA[Bob]]></Name>
</ROW>
<ROW>
<ID>3</ID>
<Name><![CDATA[Dan]]></Name>
</ROW>
</TABLE>
<TABLE name="Animals">
<ROW>
<ID>1</ID>
<Name><![CDATA[Golden]]></Name>
</ROW>
</TABLE>
</INI>
这就是你想要的。