从指定的起始位置读取XML文件

时间:2012-11-28 13:27:27

标签: xml c#-4.0 lambda xml-parsing linq-to-xml

我有一个不断被追加的XML文件。我需要重复读取XML中的数据,但在每次传递时我都不想检索我在上一次运行中处理过的数据。

由于我知道文件在处理时有多长,我想我可以使用文件的长度(减去结尾/联系人标签)来确定我上次停止的位置。知道这一点,从文件中的特定字节位置开始检索所有Contact标记的最佳方法是什么?

<?xml version="1.0"?>
<Contacts>
    <Contact>
      <Name>Todd</Name>
      <Email>todd@blah.com</Email>
  </Contact>
    <Contact>
      <Name>Sarah</Name>
      <Email>sarah@blah.com</Email>
  </Contact>
</Contacts>

此代码块会抓取所有联系人。我想限制它,所以它只在第一次接触后(在字节116处)拾取数据。

var xdoc = XDocument.Load(PATH_TO_FILE);
var contact = xdoc.Descendants("Contact").Select(x => (string)x).ToArray();

4 个答案:

答案 0 :(得分:2)

如果您仍想从特定的偏移量中读取并保持自己的高级别。这是XmlTailReader,它将只有结束标记的文档内容放在其他根元素中:

class XmlTailReader : XmlReader
{
    private readonly XmlReader _reader;
    private readonly XmlReader _fakeReader;
    private int _level;
    enum Fake { Start, Align, None, End };
    private Fake _fake;

    public XmlTailReader(XmlReader reader, string rootTag = "root")
    {
        _reader = reader;
        _fake = Fake.Start;

        var doc = new XmlDocument();
        var root = doc.CreateElement(rootTag);
        doc.AppendChild(root);
        // make sure that we'll get Element/EndElement
        root.AppendChild(doc.CreateComment("dummy")); 
        _fakeReader = new XmlNodeReader(root);
    }

    private XmlReader Proxy
    {
        get
        {
            switch(_fake)
            {
            case Fake.Start:
            case Fake.Align:
            case Fake.End:
                return _fakeReader;
            default:
                return _reader;
            }
        }
    }

    public override bool Read()
    {
        switch(_fake)
        {
        case Fake.Start:
            if (!_fakeReader.Read()) return false;
            if (NodeType == XmlNodeType.Element)
            {
                ++_level;
                _fake = Fake.Align;
            }
            return true;
        case Fake.Align:
            _fake = Fake.None;
            while(true) // align to first Element
            {
                if (!_reader.Read()) return false;
                if (NodeType == XmlNodeType.Element)
                {
                    ++_level;
                    break;
                }
            }
            return true;
        case Fake.None:
            try
            {
                if (!_reader.Read()) return false;
            }
            catch (XmlException e)
            {
                // if (!e.Message.StartsWith("Unexpected end tag.")) throw;
                // reading of extra-closing tag cause "Unexpected end tag"
                // so use this as event for transition too
                _fake = Fake.End;
                if (!_fakeReader.Read()) return false;
                return true;
            }
            switch(NodeType)
            {
            case XmlNodeType.Element:
                ++_level;
                break;
            case XmlNodeType.EndElement:
                if (--_level == 0)
                {
                    _fake = Fake.End;
                    if (!_fakeReader.Read()) return false;
                }
                break;
            }
            return true;
        default:
            return Proxy.Read();
        }
    }

    public override string Value
    {
        get { return Proxy.Value; }
    }

    public override XmlNodeType NodeType
    {
        get { return Proxy.NodeType; }
    }
    // rest use Proxy property for forwarding
}

void Main()
{
    var xml = "<?xml version=\"1.0\"?>" + @"
<Contacts>
    <Contact>
      <Name>Todd</Name>
      <Email>todd@blah.com</Email>
  </Contact>
    <Contact>
      <Name>Sarah</Name>
      <Email>sarah@blah.com</Email>
  </Contact>
    <Contact>
      <Name>Peter</Name>
      <Email>peter@blah.com</Email>
  </Contact>
</Contacts>";
    const string tag = "</Contact>";
    var xml2 = xml.Substring(xml.IndexOf(tag) + tag.Length);
    using(var sr = new StringReader(xml2))
    using(var xr = XmlReader.Create(sr, new XmlReaderSettings { ConformanceLevel = ConformanceLevel.Fragment, } ))
    using(var xr2 = new XmlTailReader(xr, "xxx"))
    {
        var xdoc = XDocument.Load(xr2);
        xdoc.Descendants("Contact").Dump();
    }
}

请注意,ConformanceLevel应该是片段,以便进行此类阅读。

答案 1 :(得分:1)

我找到了一种通过索引位置保存/检索的方法。这也可以。

int position = 1;
var contacts = xdoc
    .Descendants("Contact")
    .Select((x, index) => new { Contact = x, Index = index })
    .Where(x => x.Index >= position)
    .Select(x => x.Contact);

答案 2 :(得分:1)

您可以创建棘手的Stream,在自定义位置模拟Ducument的start元素。 它非常粗糙的代码,但它的工作

void Main()
{
 var xml =
    @"<Contacts><Contact><Name>Todd</Name><Email>todd@blah.com</Email></Contact><Contact>
      <Name>Sarah1</Name>
      <Email>sarah@blah.com</Email>
  </Contact>
  <Contact>
      <Name>Sarah2</Name>
      <Email>sarah@blah.com</Email>
  </Contact>
</Contacts>";

    var ms = new MemoryStream(Encoding.UTF8.GetBytes(xml));
    ms.Position = 74;
    var reader = XmlReader.Create(new CustomReader("<Contacts>",ms));

    var xdoc = XDocument.Load(reader);
    var contact = xdoc.Descendants("Contact").Select(x => x).ToArray();

    contact.Dump();
}

public class CustomReader : Stream
{
    private readonly string _element;
    private readonly Stream _stream;
    private int _offset;

    public CustomReader(string element, Stream stream)
    {
        _element = element;
        _stream = stream;
        _offset = -element.Length;
    }

    public override bool CanRead
    {
        get { return true; }
    }

    public override bool CanSeek
    {
        get { return false; }
    }

    public override bool CanWrite
    {
        get { return false; }
    }

    public override void Close()
    {
        _stream.Close();
        base.Close();
    }

    public override void Flush()
    {
        throw new NotImplementedException();
    }

    public override long Length
    {
        get { throw new NotImplementedException(); }
    }

    public override long Position
    {
        get { throw new NotImplementedException(); }
        set { throw new NotImplementedException(); }
    }

    public override int Read(byte[] buffer, int offset, int count)
    {
        if (count == 0) return 0;

        if (_offset < 0)
        {
            var buf = Encoding.UTF8.GetBytes(_element);
            Buffer.BlockCopy(buf, 0, buffer, offset, buf.Length);
            _offset = 0;
            return buf.Length;
        }

        return _stream.Read(buffer, offset, count);
    }

    public override long Seek(long offset, SeekOrigin origin)
    {
        throw new NotImplementedException();
    }

    public override void SetLength(long value)
    {
        throw new NotImplementedException();
    }

    public override void Write(byte[] buffer, int offset, int count)
    {
        throw new NotImplementedException();
    }
}

答案 3 :(得分:1)

如果您不想破坏读取XML的一致性。你不能避免用一些第一个元素来构建XDocument

class XmlSkipReader : XmlReader
{
    private readonly XmlReader _reader;
    private readonly int _skip;
    private int _level, _skipped;
    public XmlSkipReader(XmlReader reader, int skip)
    {
        _reader = reader;
        _skip = skip;
    }

    public override bool Read()
    {
        if (_skipped == _skip) return _reader.Read();
        if (_level < 1)
        {
            if(!_reader.Read()) return false;
            switch(_reader.NodeType)
            {
                case XmlNodeType.Element: ++_level; break;
            }
            return true;
        }
        if(!_reader.Read()) return false;
        switch(_reader.NodeType)
        {
            case XmlNodeType.Element:
                ++_level;
                break;
            default: return true;
        }

        for(; _skipped < _skip; ++_skipped)
        {
            while(_level > 1)
            {
                if(!_reader.Read()) return false;
                switch(_reader.NodeType)
                {
                    case XmlNodeType.Element:
                        ++_level;
                        break;
                    case XmlNodeType.EndElement:
                        --_level;
                        break;
                }
            }
        }
        return _reader.Read();
    }
    // rest is just proxy to _reader
}

void Main()
{
    var xml = "<?xml version=\"1.0\"?>" + @"
<Contacts>
    <Contact>
      <Name>Todd</Name>
      <Email>todd@blah.com</Email>
  </Contact>
    <Contact>
      <Name>Sarah</Name>
      <Email>sarah@blah.com</Email>
  </Contact>
</Contacts>";
    using(var sr = new StringReader(xml))
    using(var xr = XmlReader.Create(sr))
    using(var xr2 = new XmlSkipReader(xr, 1))
    {
        var xdoc = XDocument.Load(xr2);
        xdoc.Descendants("Contact").Dump();
    }
}