如何在.net

时间:2016-10-01 10:30:04

标签: c# .net xml linq out-of-memory

我有一个大的7GB xml文件,我在这里粘贴一小部分

       <?xml version="1.0" encoding="UTF-8"?>
    <bulkFile xmlns:un="utranNrm.xsd"
        xmlns:es="Attributes.15.25.xsd"
        xmlns:xn="generic.xsd" xmlns:gn="geran.xsd" xmlns="configData.xsd">
      <fileHeader fileFormatVersion="32.615 V4.5" Name="Wmart"/>
      <configData dnPrefix="Undefined">
        <xn:SubNetwork id="M_ROOT">
          <xn:VsDataContainer id="1">
            <xn:attributes>
              <xn:vsDataType>vsDataAreas</xn:vsDataType>
              <xn:vsDataFormatVersion>SpecificAttributes</xn:vsDataFormatVersion>
              <es:vsDataAreas/>
            </xn:attributes>
            <xn:VsDataContainer id="424_2_2">
              <xn:attributes>
                <xn:vsDataType>vsDataPlmn</xn:vsDataType>
                <xn:vsDataFormatVersion>SpecificAttributes</xn:vsDataFormatVersion>
                <es:vsDataPlmn>
                  <es:userLabel></es:userLabel>
                  <es:mcc>424</es:mcc>
                  <es:mnc>2</es:mnc>
                  <es:mncLength>2</es:mncLength>
                  <es:aliasPlmnIdentities>
                    <es:mcc>424</es:mcc>
                    <es:mnc>2</es:mnc>
                    <es:mncLength>2</es:mncLength>
                  </es:aliasPlmnIdentities>
                </es:vsDataPlmn>
              </xn:attributes>
              <xn:VsDataContainer id="10">
                <xn:attributes>
                  <xn:vsDataType>vsDataLocationArea</xn:vsDataType>
                  <xn:vsDataFormatVersion>SpecificAttributes</xn:vsDataFormatVersion>
                  <es:vsDataLocationArea>
                    <es:userLabel></es:userLabel>
                    <es:lac>10</es:lac>
                    <es:t3212>10</es:t3212>
                    <es:att>1</es:att>
                  </es:vsDataLocationArea>
                </xn:attributes>
              </xn:VsDataContainer>
            </xn:VsDataContainer>
          </xn:VsDataContainer>
          <xn:SubNetwork id="G-Mum">
            <xn:attributes>
              <xn:userDefinedNetworkType>SiteSolutions</xn:userDefinedNetworkType>
              <xn:userLabel>G-Mum</xn:userLabel>
            </xn:attributes>
            <xn:MeContext id="32509_I_East">
              <xn:VsDataContainer id="23502_I_AAN_UAEU_Crescent_East">
                <xn:attributes>
                  <xn:vsDataType>vsDataMeContext</xn:vsDataType>
                  <xn:vsDataFormatVersion>EricssonSpecificAttributes.15.25</xn:vsDataFormatVersion>
                  <es:vsDataMeContext>
                    <es:userLabel>23502_I_AAN_UAEU_Crescent_East</es:userLabel>
                    <es:ipAddress>10.235.20.10</es:ipAddress>
                    <es:neMIMversion>vF.1.108</es:neMIMversion>
                    <es:lostSynchronisation>SYNCHRONISED</es:lostSynchronisation>
                    <es:bcrLastChange>1474785017888</es:bcrLastChange>
                    <es:bctLastChange>1470699087457</es:bctLastChange>
                    <es:multiStandardRbs6k>false</es:multiStandardRbs6k>
                    <es:mixedModeRadio>false</es:mixedModeRadio>
                    <es:mirrorMIBversion>F.1.100.S.1.6</es:mirrorMIBversion>
                    <es:stnNodes></es:stnNodes>
                  </es:vsDataMeContext>
                </xn:attributes>
              </xn:VsDataContainer>
              <xn:ManagedElement id="1">
                <xn:attributes>
                </xn:attributes>
                <xn:VsDataContainer id="1">
                  <xn:attributes>
                    <xn:vsDataType>vsDataManagedElement</xn:vsDataType>
                    <xn:vsDataFormatVersion>Attributes.15.25.xsd</xn:vsDataFormatVersion>
                    <es:vsDataManagedElement>
                      <es:siteRef>SubNetwork=M_ROOT,Site=32509_I_East</es:siteRef>
                      <es:productType>Node</es:productType>
                      <es:productName>RBS6601L</es:productName>
                      <es:productNumber></es:productNumber>
                      <es:productRevision></es:productRevision>
                      <es:prodDesignation>0</es:prodDesignation>
                    </es:vsDataManagedElement>
                  </xn:attributes>
                </xn:VsDataContainer>
                .
                .
                <xn:VsDataContainer id="1">
                  <xn:attributes>
                    <xn:vsDataType>vsDataENodeBFunction</xn:vsDataType>
                    <xn:vsDataFormatVersion>Attributes.15.25.xsd</xn:vsDataFormatVersion>
                    <es:vsDataENodeBFunction>
                      <es:userLabel></es:userLabel>
                      <es:dscpLabel>24</es:dscpLabel>
                      <es:dnsLookupOnTai>1</es:dnsLookupOnTai>
                      <es:eNBId>32509</es:eNBId>
                    </es:vsDataENodeBFunction>
                  </xn:attributes>
                  <xn:VsDataContainer id="BC_3250_32509_L1_B">
                    <xn:attributes>
                      <xn:vsDataType>vsDataEUtranCellFDD</xn:vsDataType>
                      <es:earfcndl>1651</es:earfcndl>
                      <es:earfcnul>19651</es:earfcnul>
                    </xn:attributes>
                  </xn:VsDataContainer>
                  ..
                  <xn:VsDataContainer id="BD_4250_32509_L1_B">
                    <xn:attributes>
                      <xn:vsDataType>vsDataEUtranCellFDD</xn:vsDataType>
                      <es:earfcndl>1651</es:earfcndl>
                      <es:earfcnul>19651</es:earfcnul>
                    </xn:attributes>
                  </xn:VsDataContainer>

                  <xn:VsDataContainer id="CC_3250_32509_L1_C">
                    <xn:attributes>
                      <xn:vsDataType>testranCellFDD</xn:vsDataType>
                      <es:earfcndl>165</es:earfcndl>
                      <es:earfcnul>19651</es:earfcnul>
                    </xn:attributes>
                  </xn:VsDataContainer>
                </xn:VsDataContainer>
                ..

              </xn:ManagedElement>
            </xn:MeContext>
 <xn:MeContext id="32509_I_west">
          <xn:VsDataContainer id="32509_I_AAN_UAEU_Crescent_East">
            <xn:attributes>
              <xn:vsDataType>vsDataMeContext</xn:vsDataType>
              <xn:vsDataFormatVersion>Attributes.15.25</xn:vsDataFormatVersion>
              <es:vsDataMeContext>
                <es:userLabel>32509_I_AAN_UAEU_Crescent_East</es:userLabel>
                <es:ipAddress>10.235.20.10</es:ipAddress>
                <es:neMIMversion>vF.1.108</es:neMIMversion>
                <es:lostSynchronisation>SYNCHRONISED</es:lostSynchronisation>
                <es:bcrLastChange>1474785017888</es:bcrLastChange>
                <es:bctLastChange>1470699087457</es:bctLastChange>
                <es:multiStandardRbs6k>false</es:multiStandardRbs6k>
                <es:mixedModeRadio>false</es:mixedModeRadio>
                <es:mirrorMIBversion>F.1.100.S.1.6</es:mirrorMIBversion>
                <es:stnNodes></es:stnNodes>
              </es:vsDataMeContext>
            </xn:attributes>
          </xn:VsDataContainer>
          <xn:ManagedElement id="1">
            <xn:attributes>
            </xn:attributes>
            <xn:VsDataContainer id="1">
              <xn:attributes>
                <xn:vsDataType>vsDataManagedElement</xn:vsDataType>
                <xn:vsDataFormatVersion>Attributes.15.25.xsd</xn:vsDataFormatVersion>
                <es:vsDataManagedElement>
                  <es:siteRef>SubNetwork=M_ROOT,Site=32509_I_East</es:siteRef>
                  <es:productType>Node</es:productType>
                  <es:productName>RBS6601L</es:productName>
                  <es:productNumber></es:productNumber>
                  <es:productRevision></es:productRevision>
                  <es:prodDesignation>0</es:prodDesignation>
                </es:vsDataManagedElement>
              </xn:attributes>
            </xn:VsDataContainer>
            .
            .
            <xn:VsDataContainer id="1">
              <xn:attributes>
                <xn:vsDataType>vsDataENodeBFunction</xn:vsDataType>
                <xn:vsDataFormatVersion>Attributes.15.25.xsd</xn:vsDataFormatVersion>
                <es:vsDataENodeBFunction>
                  <es:userLabel></es:userLabel>
                  <es:dscpLabel>24</es:dscpLabel>
                  <es:dnsLookupOnTai>1</es:dnsLookupOnTai>
                  <es:eNBId>32509</es:eNBId>
                </es:vsDataENodeBFunction>
              </xn:attributes>
              <xn:VsDataContainer id="CC_3250_32509_L1_B">
                <xn:attributes>
                  <xn:vsDataType>vsDataEUtranCellFDD</xn:vsDataType>
                  <es:earfcndl>1651</es:earfcndl>
                  <es:earfcnul>19651</es:earfcnul>
                </xn:attributes>
              </xn:VsDataContainer>
              ..
              <xn:VsDataContainer id="CD_4250_32509_L1_B">
                <xn:attributes>
                  <xn:vsDataType>vsDataEUtranCellFDD</xn:vsDataType>
                  <es:earfcndl>1651</es:earfcndl>
                  <es:earfcnul>19651</es:earfcnul>
                </xn:attributes>
              </xn:VsDataContainer>

              <xn:VsDataContainer id="DC_3250_32509_L1_C">
                <xn:attributes>
                  <xn:vsDataType>testranCellFDD</xn:vsDataType>
                  <es:earfcndl>165</es:earfcndl>
                  <es:earfcnul>19651</es:earfcnul>
                </xn:attributes>
              </xn:VsDataContainer>
            </xn:VsDataContainer>
            ..

          </xn:ManagedElement>
        </xn:MeContext>
          </xn:SubNetwork>

        </xn:SubNetwork>
      </configData>
    </bulkFile>

这里我想获得我声明的c#变量的值,如subnetworksitenameeNBIdcellnameearfcndl和{{1 }}  所有信息都位于根标记earfcnul标记下,所有网站都位于根目录下,并且我只粘贴一个网站详细信息。逻辑是

<xn:SubNetwork id="M_ROOT">

所以我想得到 subnetwork = <xn:SubNetwork id > attribute value sitename = <xn:MeContext id> attribute value eNBId = <es:eNBId> value, here the important thing is, there will be huge number of `<xn:VsDataContainer id="1">` parent tag,but the filtration is based on `<xn:vsDataType>` inner tag which is of `vsDataENodeBFunction`. cellname = `<xn:VsDataContainer id>` attribute value and `earfcndl` and `earfcnul` is values for `<es:earfcndl> and <es:earfcnul>` tag .Here also filtration is based on <xn:vsDataType>vsDataEUtranCellFDD</xn:vsDataType>

在这里,我做了以下代码,但卡住了,不知道该怎么做。但我可以保证这是我们可以阅读的唯一方式(subnetwork = G-Mum,,sitename = 32509_I_East,,eNBId=32509,,cellname=BC_3250_32509_L1_B,,earfcndl=1651,,earfcnul=19651xmlreader方法) 7Gb Xml文件,休息全部将抛出&#34;内存不足异常&#34;

(XElement)XNode.ReadFrom

修改 层次结构只有一个xn:SubNetwork id =&#34; M_ROOT&#34;包含所有区域的标签,这些区域再次包含标签下的所有网站信息,该信息再次包含vsDataEUtranCellFDD类型下的所有单元格信息

修改 @jdweng感谢您的帮助,现在他们再次增加了xml,现在我们使用(XElement)XElement.ReadFrom(reader)获取了代码片段xml;本身抛出内存不足异常。如何删除不必要的xml部分。我的层次结构是

using (XmlReader xr = XmlReader.Create(path))
                {
                    xr.MoveToContent();
                    XNamespace un = xr.LookupNamespace("un");
                    while (xr.Read())
                    {
                        while (xr.NodeType == XmlNodeType.Element && xr.NamespaceURI == un && xr.LocalName == "M_ROOT")
                        {
                            XElement pin = (XElement)XNode.ReadFrom(xr);
                            var data = from atts in pin.Elements(un + "attributes")
                                       select new
                                       {
                                           eNBId= (string)atts.Element(es + "eNBId"),

                                       }
                        }
                    }

所以我在我的问题的乞讨中发布的是必要的数据,其余的都是不必要的

修改 @jdweng,是的,我发布的是必需的,我正在再次编辑我的问题。 Heirarchy是

<xn:SubNetwork id="M_ROOT">--------------Root
      <xn:SubNetwork id="G-Mum">---------Region
              <xn:MeContext id="32509_I_East">-----sites
                     <xn:VsDataContainer id="BC_3250_32509_L1_B"> ---Cells
                          <xn:attributes>
                            <xn:vsDataType>vsDataEUtranCellFDD</xn:vsDataType>
                    </xn:MeContext>
                  </xn:SubNetwork>
                </xn:SubNetwork>

然后经过几行     `<xn:SubNetwork>` --------Root and only one 包含的区域将以群组形式显示,但此处只有<xn:SubNetwork>开头的ID才有效,因此G- 然后G-Mum包含的网站将作为组再次包含大量<xn:MeContext>,但此处包含子节点<xn:VsDataContainer>是有效的,以获得<xn:vsDataType>vsDataManagedElement</xn:vsDataType>。 然后再经过几行<es:eNBId>子节点<xn:VsDataContainer>包含单元格将来。这个(单元格)是最内层的。   因此,只有子节点<xn:vsDataType>vsDataEUtranCellFDD</xn:vsDataType>才能获得<xn:VsDataContainer><xn:vsDataType>vsDataENodeBFunction</xn:vsDataType>才能获取单元格{/ 1>}

修改 在分析了大文件之后我才知道单元格正在网站下面,但是我将它发布在外面,即vsDataEUtranCellFDD正在标记vsDataENodeBFunction.I已经编辑了我发布的xml

EDIT 10/10/16 xml几乎没有变化,在Enbid <xn:vsDataType>vsDataEUtranCellFDD</xn:vsDataType>之前还有一个VsDataContainer,因为VsDataContainer未被捕获。我使用了<xn:vsDataType>vsDataENodeBFunction</xn:vsDataType>但是没有#39;工作                                                              vsDataManagedElement

3 个答案:

答案 0 :(得分:1)

你有一个非常大的文件,所以最好使用XmlReader。试试这段代码

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Linq;

namespace ConsoleApplication16
{
    class Program
    {
        const string FILENAME = @"c:\temp\test.xml";
        enum State
        {
            GET_SUBNETWORK,
            GET_MECONTEXT,
            GET_CONTAINERS,
            GET_SUBNETWORK_OR_MECONTEXT
        }
        static void Main(string[] args)
        {
            XmlReaderSettings settings = new XmlReaderSettings() { IgnoreWhitespace = true };
            XmlReader reader = XmlReader.Create(FILENAME, settings);

            reader.ReadToFollowing("SubNetwork", "generic.xsd");
            Network.network.name = reader.GetAttribute("id");
            Network.network.subnetworks = new List<SubNetwork>();

            string xnNameSpace = reader.LookupNamespace("xn");
            reader.ReadToFollowing("SubNetwork", xnNameSpace);

            Sites newSite = null;
            SubNetwork subNetWork = null;
            Boolean endElement = false;

            State state = State.GET_SUBNETWORK;
            while (!reader.EOF)
            {
                switch (state)
                {
                    case State.GET_SUBNETWORK:
                        if (reader.Name != "xn:SubNetwork")
                        {
                            reader.ReadToFollowing("SubNetwork", xnNameSpace);
                        }
                        if (!reader.EOF)
                        {
                            subNetWork = new SubNetwork();
                            Network.network.subnetworks.Add(subNetWork);
                            subNetWork.name = reader.GetAttribute("id");
                            subNetWork.sites = new List<Sites>();

                            state = State.GET_MECONTEXT;
                        }
                        break;

                    case State.GET_MECONTEXT :
                        if (reader.Name != "xn:MeContext")
                        {
                            if (reader.NodeType == XmlNodeType.EndElement)
                            {
                                endElement = true;
                                state = State.GET_SUBNETWORK;
                            }
                            else
                            {
                                endElement = false;
                                reader.ReadToFollowing("MeContext", xnNameSpace);
                            }
                        }
                        if (!reader.EOF && !endElement)
                        {
                            state = State.GET_CONTAINERS;
                            newSite = new Sites();
                            subNetWork.sites.Add(newSite);
                            newSite.sitename = reader.GetAttribute("id");
                            reader.ReadToFollowing("ManagedElement", xnNameSpace);
                            reader.ReadToFollowing("VsDataContainer", xnNameSpace);
                        }
                        break;

                    case State.GET_CONTAINERS:

                        if (!reader.EOF)
                        {
                            switch (reader.NodeType)
                            {
                                case XmlNodeType.EndElement:
                                    state = State.GET_SUBNETWORK_OR_MECONTEXT;
                                    break;
                                case XmlNodeType.Text:
                                    string text = reader.ReadContentAsString();
                                    break;

                                case XmlNodeType.Element:

                                    string elementName = reader.Name;
                                    XElement element = (XElement)XElement.ReadFrom(reader);
                                    if (elementName == "xn:VsDataContainer")
                                    {
                                        XElement vsDataENodeBFunction = element.Descendants().Where(a => a.Name.LocalName == "vsDataENodeBFunction").FirstOrDefault();
                                        if (vsDataENodeBFunction != null)
                                        {                                            
                                            XElement eNBId = element.Descendants().Where(a => a.Name.LocalName == "eNBId").FirstOrDefault();
                                            if (eNBId != null)
                                            {
                                                newSite.eNBId = (int)eNBId;
                                            }
                                        }
                                        List<XElement> vsDataContainers = element.Descendants().Where(a => a.Name.LocalName == "VsDataContainer").ToList();
                                        List<XElement> vsDataEUtranCellFDD = vsDataContainers.Where(x => x.Descendants().Where(a => (a.Name.LocalName == "vsDataType") && ((string)a == "vsDataEUtranCellFDD")).Any()).ToList();
                                        if (vsDataEUtranCellFDD.Any())
                                        {
                                            List<CellName> cells = vsDataEUtranCellFDD.Select(x => new CellName()
                                            {
                                                id = (string)x.Attribute("id"),
                                                earfcndl = (int)x.Descendants().Where(a => a.Name.LocalName == "earfcndl").FirstOrDefault(),
                                                earfcnul = (int)x.Descendants().Where(a => a.Name.LocalName == "earfcnul").FirstOrDefault()
                                            }).ToList();
                                            if (newSite.cellName == null)
                                            {
                                                newSite.cellName = new List<CellName>();
                                            }
                                            newSite.cellName.AddRange(cells);
                                        }
                                    }
                                    break;

                                default:
                                    Console.WriteLine("Unexpected Results");
                                    string elementText = reader.ReadContentAsString();
                                    Console.ReadLine();
                                    break;
                            }
                        }
                        break;

                    case State.GET_SUBNETWORK_OR_MECONTEXT :
                        switch(reader.NodeType)
                        {
                            case XmlNodeType.Element :
                                switch(reader.Name)
                                {
                                    case "xn:MeContext" :
                                        state = State.GET_MECONTEXT;
                                        break;
                                    case "xn:SubNetwork" :
                                        state = State.GET_SUBNETWORK;
                                        break;
                                    default :
                                        reader.ReadStartElement();
                                        break;
                                }
                                break;

                            case XmlNodeType.Text:
                                string text = reader.ReadContentAsString();
                                break;

                            case XmlNodeType.EndElement:
                                reader.ReadEndElement();
                                break;

                            default:
                                Console.WriteLine("Unexpected Results");
                                string elementText = reader.ReadContentAsString();
                                Console.ReadLine();
                                break;

                        }
                        break;
                }
            }
        }

    }
    public class Network
    {
        public static Network network = new Network();
        public string name { get; set; }
        public List<SubNetwork> subnetworks { get; set; }
    }
    public class SubNetwork
    {
        public string name { get; set; }
        public List<Sites> sites { get; set; }

    }
    public class Sites
    {
        public string sitename { get; set; }
        public int eNBId { get; set; }
        public List<CellName> cellName { get; set; }
    }
    public class CellName
    {
        public string id { get; set; }
        public int earfcndl { get; set; }
        public int earfcnul { get; set; }
    }
}

答案 1 :(得分:1)

使用以下代码获取xml的一小部分

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Net;

namespace ConsoleApplication17
{
    class Program
    {
        const string inputFilename = @"c:\temp\test.xml";
        const string outputFilename = @"c:\temp\test.txt";
        const int COPY_LINES = 10000;
        static void Main(string[] args)
        {
            StreamReader reader = new StreamReader(inputFilename, Encoding.UTF8);
            StreamWriter writer = new StreamWriter(outputFilename, false,  Encoding.UTF8);
            Boolean start = false;
            int i = 0;
            while (i < COPY_LINES)
            {
                string inputLine = reader.ReadLine();
                if(inputLine.Contains("xn:SubNetwork id=\"G-Mum\""))
                {
                    start = true;
                }
                if (start)
                {
                    writer.WriteLine(inputLine);
                    i++;
                }
            }
            writer.Flush();
            writer.Close();

            MemoryStream stream = new MemoryStream(1000000);
        }
    }
}

答案 2 :(得分:1)

下面应该解决earfcndl和earfcnul

的问题
 List<CellName> cells = new List<CellName>(); ;
                                            foreach (XElement xelement in vsDataEUtranCellFDD)
                                            {

                                                CellName newCell = new CellName();
                                                newCell.id = (string)xelement.Attribute("id");
                                                XElement attribute = xelement.Descendants().Where(a => a.Name.LocalName == "vsDataEUtranCellFDD").FirstOrDefault();

                                                newCell.earfcndl = (int?)attribute.Elements().Where(a => a.Name.LocalName == "earfcndl").FirstOrDefault();
                                                newCell.earfcnul = (int)attribute.Elements().Where(a => a.Name.LocalName == "earfcnul").FirstOrDefault();
        cells.Add(newCell);
     }
}