我有一个不断被追加的XML文件。我需要重复读取XML中的数据,但在每次传递时我都不想检索我在上一次运行中处理过的数据。
由于我知道文件在处理时有多长,我想我可以使用文件的长度(减去结尾/联系人标签)来确定我上次停止的位置。知道这一点,从文件中的特定字节位置开始检索所有Contact标记的最佳方法是什么?
<?xml version="1.0"?>
<Contacts>
<Contact>
<Name>Todd</Name>
<Email>todd@blah.com</Email>
</Contact>
<Contact>
<Name>Sarah</Name>
<Email>sarah@blah.com</Email>
</Contact>
</Contacts>
此代码块会抓取所有联系人。我想限制它,所以它只在第一次接触后(在字节116处)拾取数据。
var xdoc = XDocument.Load(PATH_TO_FILE);
var contact = xdoc.Descendants("Contact").Select(x => (string)x).ToArray();
答案 0 :(得分:2)
如果您仍想从特定的偏移量中读取并保持自己的高级别。这是XmlTailReader,它将只有结束标记的文档内容放在其他根元素中:
class XmlTailReader : XmlReader
{
private readonly XmlReader _reader;
private readonly XmlReader _fakeReader;
private int _level;
enum Fake { Start, Align, None, End };
private Fake _fake;
public XmlTailReader(XmlReader reader, string rootTag = "root")
{
_reader = reader;
_fake = Fake.Start;
var doc = new XmlDocument();
var root = doc.CreateElement(rootTag);
doc.AppendChild(root);
// make sure that we'll get Element/EndElement
root.AppendChild(doc.CreateComment("dummy"));
_fakeReader = new XmlNodeReader(root);
}
private XmlReader Proxy
{
get
{
switch(_fake)
{
case Fake.Start:
case Fake.Align:
case Fake.End:
return _fakeReader;
default:
return _reader;
}
}
}
public override bool Read()
{
switch(_fake)
{
case Fake.Start:
if (!_fakeReader.Read()) return false;
if (NodeType == XmlNodeType.Element)
{
++_level;
_fake = Fake.Align;
}
return true;
case Fake.Align:
_fake = Fake.None;
while(true) // align to first Element
{
if (!_reader.Read()) return false;
if (NodeType == XmlNodeType.Element)
{
++_level;
break;
}
}
return true;
case Fake.None:
try
{
if (!_reader.Read()) return false;
}
catch (XmlException e)
{
// if (!e.Message.StartsWith("Unexpected end tag.")) throw;
// reading of extra-closing tag cause "Unexpected end tag"
// so use this as event for transition too
_fake = Fake.End;
if (!_fakeReader.Read()) return false;
return true;
}
switch(NodeType)
{
case XmlNodeType.Element:
++_level;
break;
case XmlNodeType.EndElement:
if (--_level == 0)
{
_fake = Fake.End;
if (!_fakeReader.Read()) return false;
}
break;
}
return true;
default:
return Proxy.Read();
}
}
public override string Value
{
get { return Proxy.Value; }
}
public override XmlNodeType NodeType
{
get { return Proxy.NodeType; }
}
// rest use Proxy property for forwarding
}
void Main()
{
var xml = "<?xml version=\"1.0\"?>" + @"
<Contacts>
<Contact>
<Name>Todd</Name>
<Email>todd@blah.com</Email>
</Contact>
<Contact>
<Name>Sarah</Name>
<Email>sarah@blah.com</Email>
</Contact>
<Contact>
<Name>Peter</Name>
<Email>peter@blah.com</Email>
</Contact>
</Contacts>";
const string tag = "</Contact>";
var xml2 = xml.Substring(xml.IndexOf(tag) + tag.Length);
using(var sr = new StringReader(xml2))
using(var xr = XmlReader.Create(sr, new XmlReaderSettings { ConformanceLevel = ConformanceLevel.Fragment, } ))
using(var xr2 = new XmlTailReader(xr, "xxx"))
{
var xdoc = XDocument.Load(xr2);
xdoc.Descendants("Contact").Dump();
}
}
请注意,ConformanceLevel应该是片段,以便进行此类阅读。
答案 1 :(得分:1)
我找到了一种通过索引位置保存/检索的方法。这也可以。
int position = 1;
var contacts = xdoc
.Descendants("Contact")
.Select((x, index) => new { Contact = x, Index = index })
.Where(x => x.Index >= position)
.Select(x => x.Contact);
答案 2 :(得分:1)
您可以创建棘手的Stream,在自定义位置模拟Ducument的start元素。 它非常粗糙的代码,但它的工作
void Main()
{
var xml =
@"<Contacts><Contact><Name>Todd</Name><Email>todd@blah.com</Email></Contact><Contact>
<Name>Sarah1</Name>
<Email>sarah@blah.com</Email>
</Contact>
<Contact>
<Name>Sarah2</Name>
<Email>sarah@blah.com</Email>
</Contact>
</Contacts>";
var ms = new MemoryStream(Encoding.UTF8.GetBytes(xml));
ms.Position = 74;
var reader = XmlReader.Create(new CustomReader("<Contacts>",ms));
var xdoc = XDocument.Load(reader);
var contact = xdoc.Descendants("Contact").Select(x => x).ToArray();
contact.Dump();
}
public class CustomReader : Stream
{
private readonly string _element;
private readonly Stream _stream;
private int _offset;
public CustomReader(string element, Stream stream)
{
_element = element;
_stream = stream;
_offset = -element.Length;
}
public override bool CanRead
{
get { return true; }
}
public override bool CanSeek
{
get { return false; }
}
public override bool CanWrite
{
get { return false; }
}
public override void Close()
{
_stream.Close();
base.Close();
}
public override void Flush()
{
throw new NotImplementedException();
}
public override long Length
{
get { throw new NotImplementedException(); }
}
public override long Position
{
get { throw new NotImplementedException(); }
set { throw new NotImplementedException(); }
}
public override int Read(byte[] buffer, int offset, int count)
{
if (count == 0) return 0;
if (_offset < 0)
{
var buf = Encoding.UTF8.GetBytes(_element);
Buffer.BlockCopy(buf, 0, buffer, offset, buf.Length);
_offset = 0;
return buf.Length;
}
return _stream.Read(buffer, offset, count);
}
public override long Seek(long offset, SeekOrigin origin)
{
throw new NotImplementedException();
}
public override void SetLength(long value)
{
throw new NotImplementedException();
}
public override void Write(byte[] buffer, int offset, int count)
{
throw new NotImplementedException();
}
}
答案 3 :(得分:1)
如果您不想破坏读取XML的一致性。你不能避免用一些第一个元素来构建XDocument
:
class XmlSkipReader : XmlReader
{
private readonly XmlReader _reader;
private readonly int _skip;
private int _level, _skipped;
public XmlSkipReader(XmlReader reader, int skip)
{
_reader = reader;
_skip = skip;
}
public override bool Read()
{
if (_skipped == _skip) return _reader.Read();
if (_level < 1)
{
if(!_reader.Read()) return false;
switch(_reader.NodeType)
{
case XmlNodeType.Element: ++_level; break;
}
return true;
}
if(!_reader.Read()) return false;
switch(_reader.NodeType)
{
case XmlNodeType.Element:
++_level;
break;
default: return true;
}
for(; _skipped < _skip; ++_skipped)
{
while(_level > 1)
{
if(!_reader.Read()) return false;
switch(_reader.NodeType)
{
case XmlNodeType.Element:
++_level;
break;
case XmlNodeType.EndElement:
--_level;
break;
}
}
}
return _reader.Read();
}
// rest is just proxy to _reader
}
void Main()
{
var xml = "<?xml version=\"1.0\"?>" + @"
<Contacts>
<Contact>
<Name>Todd</Name>
<Email>todd@blah.com</Email>
</Contact>
<Contact>
<Name>Sarah</Name>
<Email>sarah@blah.com</Email>
</Contact>
</Contacts>";
using(var sr = new StringReader(xml))
using(var xr = XmlReader.Create(sr))
using(var xr2 = new XmlSkipReader(xr, 1))
{
var xdoc = XDocument.Load(xr2);
xdoc.Descendants("Contact").Dump();
}
}