C#Xml序列化为一个类...我是否需要为给定XML文件中的所有内容创建一个对象\类?

时间:2016-07-22 17:44:04

标签: c# xml parsing serialization

我正在开展一个学校项目,我必须解析一个可以改变其复杂程度的XML文件。我所知道的是我所追求的各种有趣元素及其属性。但是,这些值可能并不总是存在,因此必须进行NULL检查。在我的研究过程中,似乎大多数人都会说,在处理复杂的XML文件时,最好将文件反序列化为预定义的类。我将提供此XML文件的两个示例供您参考。我还会告诉你有趣的元素和属性。我正在寻找的是有人提供一个例子,说明他们如何解析这个文件,以便提取属性值,以便编辑它们并将它们写回同一个文件。我还将包括我到目前为止的代码......

示例XML文件(1):

SampleXML1 - Simple XML File to be parsed

示例XML文件(2):

SampleXML2 - Not so simple XML file to be parsed

有趣的元素是具有以下属性的任何元素:

  • 瓦特:rsidR
  • wrsidRDefault
  • 瓦特:rsidP
  • 瓦特:rsidRPr
  • 瓦特:rsidTr

我目前有以下方法解析文件并读入属性值,并允许我在内存中编辑它们。但是,我无法弄清楚如何最好地将这个相同的数据结构写回属性。因此,我的研究和随后的关于XML序列化的问题。我一如既往地感谢您的帮助和投入......谢谢!

我的代码段:

public static void shaqfu(string strMsg)
    {
        string strFile = @"C:\SourceFolder\SampleXML\document-test.xml";
        //string strFile = @"C:\SourceFolder\SampleXML\document (2).xml";
        //string strFile = @"C:\SourceFolder\SampleXML\document (3).xml";

        int index = 0;
        var i = 0;
        using (XmlReader reader = XmlReader.Create(strFile))
        {
            while (reader.Read())
            {
                if (reader.IsStartElement())
                {
                    List <string> rlist = new List<string>();

                    switch (reader.Name)
                    {
                        case "w:p":

                            string wp_rsidRAttrib = reader.GetAttribute("w:rsidR");
                            string wp_rsidRDefaultAttrib = reader.GetAttribute("w:rsidRDefault");
                            string wp_rsidPAttrib = reader.GetAttribute("w:rsidP");
                            string wp_rsidRPrAttrib = reader.GetAttribute("w:rsidRPr");
                            string wp_rsidTrAttrib = reader.GetAttribute("w:rsidTr");

                            if (wp_rsidRAttrib != null)
                            {
                                rlist.Add(wp_rsidRAttrib);
                                index++;
                            }
                            if (wp_rsidRPrAttrib != null)
                            {
                                rlist.Add(wp_rsidRPrAttrib);
                                index++;
                            }
                            if (wp_rsidRDefaultAttrib != null)
                            {
                                rlist.Add(wp_rsidRDefaultAttrib);
                                index++;
                            }
                            if (wp_rsidPAttrib != null)
                            {
                                rlist.Add(wp_rsidPAttrib);
                                index++;
                            }
                            if (wp_rsidTrAttrib != null)
                            {
                                rlist.Add(wp_rsidTrAttrib);
                                index++;
                            }
                            break;

                        case "w:r":

                            string wr_rsidRAttrib = reader.GetAttribute("w:rsidR");
                            string wr_rsidRDefaultAttrib = reader.GetAttribute("w:rsidRDefault");
                            string wr_rsidPAttrib = reader.GetAttribute("w:rsidP");
                            string wr_rsidRPrAttrib = reader.GetAttribute("w:rsidRPr");
                            string wr_rsidTrAttrib = reader.GetAttribute("w:rsidTr");

                            if (wr_rsidRAttrib != null)
                            {
                                rlist.Add(wr_rsidRAttrib);
                                index++;
                            }
                            if (wr_rsidRPrAttrib != null)
                            {
                                rlist.Add(wr_rsidRPrAttrib);
                                index++;
                            }
                            if (wr_rsidRDefaultAttrib != null)
                            {
                                rlist.Add(wr_rsidRDefaultAttrib);
                                index++;
                            }
                            if (wr_rsidPAttrib != null)
                            {
                                rlist.Add(wr_rsidPAttrib);
                                index++;
                            }
                            if (wr_rsidTrAttrib != null)
                            {
                                rlist.Add(wr_rsidTrAttrib);
                                index++;
                            }
                            break;

                        case "w:tr":

                            string wtr_rsidRAttrib = reader.GetAttribute("w:rsidR");
                            string wtr_rsidRDefaultAttrib = reader.GetAttribute("w:rsidRDefault");
                            string wtr_rsidPAttrib = reader.GetAttribute("w:rsidP");
                            string wtr_rsidRPrAttrib = reader.GetAttribute("w:rsidRPr");
                            string wtr_rsidTrAttrib = reader.GetAttribute("w:rsidTr");

                            if (wtr_rsidRAttrib != null)
                            {
                                rlist.Add(wtr_rsidRAttrib);
                                index++;
                            }
                            if (wtr_rsidRPrAttrib != null)
                            {
                                rlist.Add(wtr_rsidRPrAttrib);
                                index++;
                            }
                            if (wtr_rsidRDefaultAttrib != null)
                            {
                                rlist.Add(wtr_rsidRDefaultAttrib);
                                index++;
                            }
                            if (wtr_rsidPAttrib != null)
                            {
                                rlist.Add(wtr_rsidPAttrib);
                                index++;
                            }
                            if (wtr_rsidTrAttrib != null)
                            {
                                rlist.Add(wtr_rsidTrAttrib);
                                index++;
                            }
                            break;

                        case "w:sectPr":

                            string wsPr_rsidRAttrib = reader.GetAttribute("w:rsidR");
                            string wsPr_rsidRDefaultAttrib = reader.GetAttribute("w:rsidRDefault");
                            string wsPr_rsidPAttrib = reader.GetAttribute("w:rsidP");
                            string wsPr_rsidRPrAttrib = reader.GetAttribute("w:rsidRPr");
                            string wsPr_rsidTrAttrib = reader.GetAttribute("w:rsidTr");

                            if (wsPr_rsidRAttrib != null)
                            {
                                rlist.Add(wsPr_rsidRAttrib);
                                index++;
                            }
                            if (wsPr_rsidRPrAttrib != null)
                            {
                                rlist.Add(wsPr_rsidRPrAttrib);
                                index++;
                            }
                            if (wsPr_rsidRDefaultAttrib != null)
                            {
                                rlist.Add(wsPr_rsidRDefaultAttrib);
                                index++;
                            }
                            if (wsPr_rsidPAttrib != null)
                            {
                                rlist.Add(wsPr_rsidPAttrib);
                                index++;
                            }
                            if (wsPr_rsidTrAttrib != null)
                            {
                                rlist.Add(wsPr_rsidTrAttrib);
                                index++;
                            }
                            break;
                    }

                    foreach (string r in rlist)
                    {
                        var rValCharArray = r.ToCharArray();
                        for (var x = 2; x < rValCharArray.Length && i < strMsg.Length; x++) rValCharArray[x] = strMsg[i++];
                        Console.WriteLine(rValCharArray);
                    }
                }
            }
        }

        Console.WriteLine("Number of rsids found : {0}",index);
    }

示例XML文件(1) - 实际文本

<?xml version="1.0" encoding="UTF-16" standalone="yes"?>
<w:document xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape" mc:Ignorable="w14 w15 wp14">
<w:body>
    <w:p w14:paraId="2CBBB1B4" w14:textId="77777777" w:rsidR="00D9548A" w:rsidRDefault="00D9548A" w:rsidP="00ED7A0B"></w:p>
    <w:p w14:paraId="2CBBB1B5" w14:textId="77777777" w:rsidR="00D9548A" w:rsidRPr="00ED77B9" w:rsidRDefault="00C706DD" w:rsidP="00D9548A"></w:p>
    <w:pPr>
        <w:rPr>
            <w:rFonts w:ascii="Arial" w:hAnsi="Arial" w:cs="Arial"></w:rFonts>
            <w:b></w:b>
            <w:sz w:val="40"></w:sz>
            <w:szCs w:val="40"></w:szCs>
        </w:rPr>
    </w:pPr>
    <w:r w:rsidRPr="00EC456F"></w:r>
    <w:tr w:rsidR="0029258E" w14:paraId="2CBBB242" w14:textId="77777777" w:rsidTr="0029258E"></w:tr>
</w:body>

示例XML文件(2) - 实际文本:

<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<w:document xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape" mc:Ignorable="w14 w15 wp14">
<w:body>
    <w:p w:rsidR="00661DE2" w:rsidRDefault="00B31FC7">
        <w:r>
            <w:t>This is a single editing session. 9:49AM</w:t>
        </w:r>
        <w:r w:rsidR="00251096">
            <w:t xml:space="preserve"> – adding more content to the first line 10:46AM</w:t>
        </w:r>
        <w:r w:rsidR="00A06ADC">
            <w:t xml:space="preserve"> – adding some more content to the original sentence. 10:49AM</w:t>
        </w:r>
        <w:bookmarkStart w:id="0" w:name="_GoBack"></w:bookmarkStart>
        <w:bookmarkEnd w:id="0"></w:bookmarkEnd>
    </w:p>
    <w:p w:rsidR="00481AA7" w:rsidRDefault="00481AA7">
        <w:r>
            <w:t>This is a second editing session. 9:56AM</w:t>
        </w:r>
    </w:p>
    <w:p w:rsidR="005C6856" w:rsidRDefault="005C6856">
        <w:r>
            <w:t>This is a third editing session. 9:58AM</w:t>
        </w:r>
    </w:p>
    <w:sectPr w:rsidR="005C6856">
        <w:pgSz w:w="12240" w:h="15840"></w:pgSz>
        <w:pgMar w:top="1440" w:right="1440" w:bottom="1440" w:left="1440" w:header="720" w:footer="720" w:gutter="0"></w:pgMar>
        <w:cols w:space="720"></w:cols>
        <w:docGrid w:linePitch="360"></w:docGrid>
    </w:sectPr>
</w:body>

1 个答案:

答案 0 :(得分:1)

尝试以下代码。我使用了XmlReader和XML Linq的组合。由于文件较大,您需要使用XmlReader。我的XML Linq我使用了多种技术来展示解析XML的各种方法。

我遇到了一些需要时间来解决的问题:

  1. 你有编码=“UTF-16”。所以我使用StreamReader并跳过第一行以使XmlReader工作
  2. 你有很多不同的命名空间,所以我忽略了所有这些命名空间并使用了Name Local,这是没有命名空间的标记名称(或属性)。
  3. 您的属性具有通常不使用的命名空间。
  4. using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Xml;
    using System.Xml.Linq;
    using System.IO;
    
    namespace ConsoleApplication4
    {
        class Program
        {
            const string FILENAME = @"c:\temp\test2.xml";
            static void Main(string[] args)
            {
                StreamReader sReader = new StreamReader(FILENAME);
                //read line to remove xml identification which may have "UTF-16"
                sReader.ReadLine();
    
                XmlReader reader = XmlReader.Create(sReader);
                reader.Read();
    
                string ns = reader.NamespaceURI;
                reader.ReadToFollowing("body", ns);
                reader.ReadStartElement("body", ns);
                string name = "";
    
                while (!reader.EOF && (reader.NodeType != XmlNodeType.EndElement))
                {
                    if (reader.Name == "") reader.Read();
                    name = reader.Name;
    
                    if (!reader.EOF && (reader.NodeType != XmlNodeType.EndElement))
                    {
                        XElement node = (XElement)XElement.ReadFrom(reader);
                        switch (node.Name.LocalName)
                        {
                            case "p":
                                string paraId = (string)node.Attributes().Where(x => x.Name.LocalName == "paraId").FirstOrDefault();
                                string textId = (string)node.Attributes().Where(x => x.Name.LocalName == "textId").FirstOrDefault();
                                string rsidR = (string)node.Attributes().Where(x => x.Name.LocalName == "rsidR").FirstOrDefault();
                                string rsidRDefault = (string)node.Attributes().Where(x => x.Name.LocalName == "rsidRDefault").FirstOrDefault();
                                string rsidP = (string)node.Attributes().Where(x => x.Name.LocalName == "rsidP").FirstOrDefault();
    
                                var rS = node.Descendants().Where(x => x.Name.LocalName == "r").Select(x => new {
                                    rsidR = (string)x.Attributes().Where(y => y.Name.LocalName == "rsidR").FirstOrDefault(),
                                    rsidRDefault = (string)x.Attributes().Where(y => y.Name.LocalName == "rsidRDefault").FirstOrDefault(),
                                    t = (string)x.Descendants().Where(y => y.Name.LocalName == "t").FirstOrDefault()
                                }).ToList();
    
                                var bookMarkStart = node.Descendants().Where(x => x.Name.LocalName == "bookmarkStart").Select(x => new
                                {
                                    id = (int)x.Attributes().Where(y => y.Name.LocalName == "id").FirstOrDefault(),
                                    name = (string)x.Attributes().Where(y => y.Name.LocalName == "name").FirstOrDefault()
                                }).FirstOrDefault();
    
                               var bookMarkEnd = node.Descendants().Where(x => x.Name.LocalName == "bookmarkEnd").Select(x => new
                                {
                                    id = (int)x.Attributes().Where(y => y.Name.LocalName == "id").FirstOrDefault(),
                                    name = (string)x.Attributes().Where(y => y.Name.LocalName == "name").FirstOrDefault()
                                }).FirstOrDefault();
    
                                break;
    
                            case "pPr":
                                XElement rFonts = node.Descendants().Where(x => x.Name.LocalName == "rFonts").FirstOrDefault();
                                if (rFonts != null)
                                {
                                    string ascii = (string)rFonts.Attributes().Where(x => x.Name.LocalName == "ascii").FirstOrDefault();
                                    string hAnsi = (string)rFonts.Attributes().Where(x => x.Name.LocalName == "hAnsi").FirstOrDefault();
                                    string cs = (string)rFonts.Attributes().Where(x => x.Name.LocalName == "cs").FirstOrDefault();
    
                                }
                                XElement b = node.Descendants().Where(x => x.Name.LocalName == "b").FirstOrDefault();
                                if (b != null)
                                {
                                    string bVal = (string)b.Attributes().Where(x => x.Name.LocalName == "val").FirstOrDefault();
    
                                }
                                XElement sz = node.Descendants().Where(x => x.Name.LocalName == "sz").FirstOrDefault();
                                if (sz != null)
                                {
                                    int szVal = (int)sz.Attributes().Where(x => x.Name.LocalName == "val").FirstOrDefault();
                                }
                                XElement szCs = node.Descendants().Where(x => x.Name.LocalName == "szCs").FirstOrDefault();
                                if (szCs != null)
                                {
                                    int szCsVal = (int)szCs.Attributes().Where(x => x.Name.LocalName == "val").FirstOrDefault();
                                }
                                break;
    
                            case "r":
                                    string rsidRPr = (string)node.Attributes().Where(x => x.Name.LocalName == "rsidRPr").FirstOrDefault();
                                break;
    
                            case "sectPr" :
                                string sectRsidR = (string)node.Attributes().Where(x => x.Name.LocalName == "rsidR").FirstOrDefault();
                                var pgSz = node.Descendants().Where(x => x.Name.LocalName == "pgSz").Select(x => new
                                {
                                    w = (int)x.Attributes().Where(y => y.Name.LocalName == "w").FirstOrDefault(),
                                    h = (int)x.Attributes().Where(y => y.Name.LocalName == "h").FirstOrDefault()
                                }).FirstOrDefault();
    
                                var pgMar = node.Descendants().Where(x => x.Name.LocalName == "pgMar").Select(x => new
                                {
                                    top = (int)x.Attributes().Where(y => y.Name.LocalName == "top").FirstOrDefault(),
                                    right = (int)x.Attributes().Where(y => y.Name.LocalName == "right").FirstOrDefault(),
                                    bottom = (int)x.Attributes().Where(y => y.Name.LocalName == "bottom").FirstOrDefault(),
                                    left = (int)x.Attributes().Where(y => y.Name.LocalName == "left").FirstOrDefault(),
                                    header = (int)x.Attributes().Where(y => y.Name.LocalName == "header").FirstOrDefault(),
                                    footer = (int)x.Attributes().Where(y => y.Name.LocalName == "footer").FirstOrDefault(),
                                    gutter = (int)x.Attributes().Where(y => y.Name.LocalName == "gutter").FirstOrDefault()
                                }).FirstOrDefault();
    
                                var cols = node.Descendants().Where(x => x.Name.LocalName == "cols").Select(x => new
                                {
                                    space = (int)x.Attributes().Where(y => y.Name.LocalName == "space").FirstOrDefault()
                                }).FirstOrDefault();
    
                                var docGrid = node.Descendants().Where(x => x.Name.LocalName == "docGrid").Select(x => new
                                {
                                    linePitch = (int)x.Attributes().Where(y => y.Name.LocalName == "linePitch").FirstOrDefault()
                                }).FirstOrDefault();
    
                                break;
    
                            case "tr":
                                string trRsidR = (string)node.Attributes().Where(x => x.Name.LocalName == "rsidR").FirstOrDefault();
                                string trParaId = (string)node.Attributes().Where(x => x.Name.LocalName == "paraId").FirstOrDefault();
                                string trTextId = (string)node.Attributes().Where(x => x.Name.LocalName == "textId").FirstOrDefault();
                                string tRrsidTr = (string)node.Attributes().Where(x => x.Name.LocalName == "rsidTr").FirstOrDefault();
                                break;
    
                            default:
                                //should not get here
                                break;
                        }
                    }
                }
            }
        }
    }