我注意到XmlReader并不总是读取标准4096块中的XML输入。 特别是,如果XmlReader发现第一个缓冲区以部分XML元素名称结尾,则XML读取器使用来自第一个缓冲区的部分数据填充第二个缓冲区,然后才将更多数据读入该缓冲区。
为什么XmlReader会这样?
为什么XmlReader不会总是请求用4096个字符填充标准缓冲区?
重现奇怪的XmlReader缓冲行为的代码:
[TestClass]
public class XmlReaderTests
{
public class DebugStreamReader : StreamReader
{
public DebugStreamReader(Stream stream) : base(stream) { }
public override int Read(
char[] buffer,
int index,
int count)
{
int readLength = base.Read(buffer, index, count);
Trace.WriteLine("readLength: " + readLength + "; buffer.Length: " + buffer.Length + "; index: " + index + "; count: " + count);
//Debug.Write("readLength: " + readLength);
return readLength;
}
}
[TestMethod]
public void SuperLongXmlElementNameXmlReaderTest()
{
string elementName = new string('a', 20000);
string xmlWithSuperLongElementName = "<" + elementName + ">test</" + elementName + ">";
MemoryStream inputStream = new MemoryStream(Encoding.UTF8.GetBytes(xmlWithSuperLongElementName));
StreamReader sr = new DebugStreamReader(inputStream);
using (XmlReader reader = XmlReader.Create(sr))
{
while (reader.Read())
{
//string value =
reader.ReadString();
}
}
}
}
输出:
readLength: 4096; buffer.Length: 4097; index: 0; count: 4096
readLength: 1; buffer.Length: 4097; index: 4095; count: 1
readLength: 4097; buffer.Length: 8194; index: 4096; count: 4097
readLength: 8194; buffer.Length: 16388; index: 8193; count: 8194
readLength: 16388; buffer.Length: 32776; index: 16387; count: 16388
readLength: 7233; buffer.Length: 32776; index: 12768; count: 20007
readLength: 0; buffer.Length: 32776; index: 0; count: 32775