我正在开展一个学校项目,我必须解析一个可以改变其复杂程度的XML文件。我所知道的是我所追求的各种有趣元素及其属性。但是,这些值可能并不总是存在,因此必须进行NULL检查。在我的研究过程中,似乎大多数人都会说,在处理复杂的XML文件时,最好将文件反序列化为预定义的类。我将提供此XML文件的两个示例供您参考。我还会告诉你有趣的元素和属性。我正在寻找的是有人提供一个例子,说明他们如何解析这个文件,以便提取属性值,以便编辑它们并将它们写回同一个文件。我还将包括我到目前为止的代码......
示例XML文件(1):
示例XML文件(2):
有趣的元素是具有以下属性的任何元素:
我目前有以下方法解析文件并读入属性值,并允许我在内存中编辑它们。但是,我无法弄清楚如何最好地将这个相同的数据结构写回属性。因此,我的研究和随后的关于XML序列化的问题。我一如既往地感谢您的帮助和投入......谢谢!
我的代码段:
public static void shaqfu(string strMsg)
{
string strFile = @"C:\SourceFolder\SampleXML\document-test.xml";
//string strFile = @"C:\SourceFolder\SampleXML\document (2).xml";
//string strFile = @"C:\SourceFolder\SampleXML\document (3).xml";
int index = 0;
var i = 0;
using (XmlReader reader = XmlReader.Create(strFile))
{
while (reader.Read())
{
if (reader.IsStartElement())
{
List <string> rlist = new List<string>();
switch (reader.Name)
{
case "w:p":
string wp_rsidRAttrib = reader.GetAttribute("w:rsidR");
string wp_rsidRDefaultAttrib = reader.GetAttribute("w:rsidRDefault");
string wp_rsidPAttrib = reader.GetAttribute("w:rsidP");
string wp_rsidRPrAttrib = reader.GetAttribute("w:rsidRPr");
string wp_rsidTrAttrib = reader.GetAttribute("w:rsidTr");
if (wp_rsidRAttrib != null)
{
rlist.Add(wp_rsidRAttrib);
index++;
}
if (wp_rsidRPrAttrib != null)
{
rlist.Add(wp_rsidRPrAttrib);
index++;
}
if (wp_rsidRDefaultAttrib != null)
{
rlist.Add(wp_rsidRDefaultAttrib);
index++;
}
if (wp_rsidPAttrib != null)
{
rlist.Add(wp_rsidPAttrib);
index++;
}
if (wp_rsidTrAttrib != null)
{
rlist.Add(wp_rsidTrAttrib);
index++;
}
break;
case "w:r":
string wr_rsidRAttrib = reader.GetAttribute("w:rsidR");
string wr_rsidRDefaultAttrib = reader.GetAttribute("w:rsidRDefault");
string wr_rsidPAttrib = reader.GetAttribute("w:rsidP");
string wr_rsidRPrAttrib = reader.GetAttribute("w:rsidRPr");
string wr_rsidTrAttrib = reader.GetAttribute("w:rsidTr");
if (wr_rsidRAttrib != null)
{
rlist.Add(wr_rsidRAttrib);
index++;
}
if (wr_rsidRPrAttrib != null)
{
rlist.Add(wr_rsidRPrAttrib);
index++;
}
if (wr_rsidRDefaultAttrib != null)
{
rlist.Add(wr_rsidRDefaultAttrib);
index++;
}
if (wr_rsidPAttrib != null)
{
rlist.Add(wr_rsidPAttrib);
index++;
}
if (wr_rsidTrAttrib != null)
{
rlist.Add(wr_rsidTrAttrib);
index++;
}
break;
case "w:tr":
string wtr_rsidRAttrib = reader.GetAttribute("w:rsidR");
string wtr_rsidRDefaultAttrib = reader.GetAttribute("w:rsidRDefault");
string wtr_rsidPAttrib = reader.GetAttribute("w:rsidP");
string wtr_rsidRPrAttrib = reader.GetAttribute("w:rsidRPr");
string wtr_rsidTrAttrib = reader.GetAttribute("w:rsidTr");
if (wtr_rsidRAttrib != null)
{
rlist.Add(wtr_rsidRAttrib);
index++;
}
if (wtr_rsidRPrAttrib != null)
{
rlist.Add(wtr_rsidRPrAttrib);
index++;
}
if (wtr_rsidRDefaultAttrib != null)
{
rlist.Add(wtr_rsidRDefaultAttrib);
index++;
}
if (wtr_rsidPAttrib != null)
{
rlist.Add(wtr_rsidPAttrib);
index++;
}
if (wtr_rsidTrAttrib != null)
{
rlist.Add(wtr_rsidTrAttrib);
index++;
}
break;
case "w:sectPr":
string wsPr_rsidRAttrib = reader.GetAttribute("w:rsidR");
string wsPr_rsidRDefaultAttrib = reader.GetAttribute("w:rsidRDefault");
string wsPr_rsidPAttrib = reader.GetAttribute("w:rsidP");
string wsPr_rsidRPrAttrib = reader.GetAttribute("w:rsidRPr");
string wsPr_rsidTrAttrib = reader.GetAttribute("w:rsidTr");
if (wsPr_rsidRAttrib != null)
{
rlist.Add(wsPr_rsidRAttrib);
index++;
}
if (wsPr_rsidRPrAttrib != null)
{
rlist.Add(wsPr_rsidRPrAttrib);
index++;
}
if (wsPr_rsidRDefaultAttrib != null)
{
rlist.Add(wsPr_rsidRDefaultAttrib);
index++;
}
if (wsPr_rsidPAttrib != null)
{
rlist.Add(wsPr_rsidPAttrib);
index++;
}
if (wsPr_rsidTrAttrib != null)
{
rlist.Add(wsPr_rsidTrAttrib);
index++;
}
break;
}
foreach (string r in rlist)
{
var rValCharArray = r.ToCharArray();
for (var x = 2; x < rValCharArray.Length && i < strMsg.Length; x++) rValCharArray[x] = strMsg[i++];
Console.WriteLine(rValCharArray);
}
}
}
}
Console.WriteLine("Number of rsids found : {0}",index);
}
示例XML文件(1) - 实际文本
<?xml version="1.0" encoding="UTF-16" standalone="yes"?>
<w:document xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape" mc:Ignorable="w14 w15 wp14">
<w:body>
<w:p w14:paraId="2CBBB1B4" w14:textId="77777777" w:rsidR="00D9548A" w:rsidRDefault="00D9548A" w:rsidP="00ED7A0B"></w:p>
<w:p w14:paraId="2CBBB1B5" w14:textId="77777777" w:rsidR="00D9548A" w:rsidRPr="00ED77B9" w:rsidRDefault="00C706DD" w:rsidP="00D9548A"></w:p>
<w:pPr>
<w:rPr>
<w:rFonts w:ascii="Arial" w:hAnsi="Arial" w:cs="Arial"></w:rFonts>
<w:b></w:b>
<w:sz w:val="40"></w:sz>
<w:szCs w:val="40"></w:szCs>
</w:rPr>
</w:pPr>
<w:r w:rsidRPr="00EC456F"></w:r>
<w:tr w:rsidR="0029258E" w14:paraId="2CBBB242" w14:textId="77777777" w:rsidTr="0029258E"></w:tr>
</w:body>
示例XML文件(2) - 实际文本:
<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<w:document xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape" mc:Ignorable="w14 w15 wp14">
<w:body>
<w:p w:rsidR="00661DE2" w:rsidRDefault="00B31FC7">
<w:r>
<w:t>This is a single editing session. 9:49AM</w:t>
</w:r>
<w:r w:rsidR="00251096">
<w:t xml:space="preserve"> – adding more content to the first line 10:46AM</w:t>
</w:r>
<w:r w:rsidR="00A06ADC">
<w:t xml:space="preserve"> – adding some more content to the original sentence. 10:49AM</w:t>
</w:r>
<w:bookmarkStart w:id="0" w:name="_GoBack"></w:bookmarkStart>
<w:bookmarkEnd w:id="0"></w:bookmarkEnd>
</w:p>
<w:p w:rsidR="00481AA7" w:rsidRDefault="00481AA7">
<w:r>
<w:t>This is a second editing session. 9:56AM</w:t>
</w:r>
</w:p>
<w:p w:rsidR="005C6856" w:rsidRDefault="005C6856">
<w:r>
<w:t>This is a third editing session. 9:58AM</w:t>
</w:r>
</w:p>
<w:sectPr w:rsidR="005C6856">
<w:pgSz w:w="12240" w:h="15840"></w:pgSz>
<w:pgMar w:top="1440" w:right="1440" w:bottom="1440" w:left="1440" w:header="720" w:footer="720" w:gutter="0"></w:pgMar>
<w:cols w:space="720"></w:cols>
<w:docGrid w:linePitch="360"></w:docGrid>
</w:sectPr>
</w:body>
答案 0 :(得分:1)
尝试以下代码。我使用了XmlReader和XML Linq的组合。由于文件较大,您需要使用XmlReader。我的XML Linq我使用了多种技术来展示解析XML的各种方法。
我遇到了一些需要时间来解决的问题:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Linq;
using System.IO;
namespace ConsoleApplication4
{
class Program
{
const string FILENAME = @"c:\temp\test2.xml";
static void Main(string[] args)
{
StreamReader sReader = new StreamReader(FILENAME);
//read line to remove xml identification which may have "UTF-16"
sReader.ReadLine();
XmlReader reader = XmlReader.Create(sReader);
reader.Read();
string ns = reader.NamespaceURI;
reader.ReadToFollowing("body", ns);
reader.ReadStartElement("body", ns);
string name = "";
while (!reader.EOF && (reader.NodeType != XmlNodeType.EndElement))
{
if (reader.Name == "") reader.Read();
name = reader.Name;
if (!reader.EOF && (reader.NodeType != XmlNodeType.EndElement))
{
XElement node = (XElement)XElement.ReadFrom(reader);
switch (node.Name.LocalName)
{
case "p":
string paraId = (string)node.Attributes().Where(x => x.Name.LocalName == "paraId").FirstOrDefault();
string textId = (string)node.Attributes().Where(x => x.Name.LocalName == "textId").FirstOrDefault();
string rsidR = (string)node.Attributes().Where(x => x.Name.LocalName == "rsidR").FirstOrDefault();
string rsidRDefault = (string)node.Attributes().Where(x => x.Name.LocalName == "rsidRDefault").FirstOrDefault();
string rsidP = (string)node.Attributes().Where(x => x.Name.LocalName == "rsidP").FirstOrDefault();
var rS = node.Descendants().Where(x => x.Name.LocalName == "r").Select(x => new {
rsidR = (string)x.Attributes().Where(y => y.Name.LocalName == "rsidR").FirstOrDefault(),
rsidRDefault = (string)x.Attributes().Where(y => y.Name.LocalName == "rsidRDefault").FirstOrDefault(),
t = (string)x.Descendants().Where(y => y.Name.LocalName == "t").FirstOrDefault()
}).ToList();
var bookMarkStart = node.Descendants().Where(x => x.Name.LocalName == "bookmarkStart").Select(x => new
{
id = (int)x.Attributes().Where(y => y.Name.LocalName == "id").FirstOrDefault(),
name = (string)x.Attributes().Where(y => y.Name.LocalName == "name").FirstOrDefault()
}).FirstOrDefault();
var bookMarkEnd = node.Descendants().Where(x => x.Name.LocalName == "bookmarkEnd").Select(x => new
{
id = (int)x.Attributes().Where(y => y.Name.LocalName == "id").FirstOrDefault(),
name = (string)x.Attributes().Where(y => y.Name.LocalName == "name").FirstOrDefault()
}).FirstOrDefault();
break;
case "pPr":
XElement rFonts = node.Descendants().Where(x => x.Name.LocalName == "rFonts").FirstOrDefault();
if (rFonts != null)
{
string ascii = (string)rFonts.Attributes().Where(x => x.Name.LocalName == "ascii").FirstOrDefault();
string hAnsi = (string)rFonts.Attributes().Where(x => x.Name.LocalName == "hAnsi").FirstOrDefault();
string cs = (string)rFonts.Attributes().Where(x => x.Name.LocalName == "cs").FirstOrDefault();
}
XElement b = node.Descendants().Where(x => x.Name.LocalName == "b").FirstOrDefault();
if (b != null)
{
string bVal = (string)b.Attributes().Where(x => x.Name.LocalName == "val").FirstOrDefault();
}
XElement sz = node.Descendants().Where(x => x.Name.LocalName == "sz").FirstOrDefault();
if (sz != null)
{
int szVal = (int)sz.Attributes().Where(x => x.Name.LocalName == "val").FirstOrDefault();
}
XElement szCs = node.Descendants().Where(x => x.Name.LocalName == "szCs").FirstOrDefault();
if (szCs != null)
{
int szCsVal = (int)szCs.Attributes().Where(x => x.Name.LocalName == "val").FirstOrDefault();
}
break;
case "r":
string rsidRPr = (string)node.Attributes().Where(x => x.Name.LocalName == "rsidRPr").FirstOrDefault();
break;
case "sectPr" :
string sectRsidR = (string)node.Attributes().Where(x => x.Name.LocalName == "rsidR").FirstOrDefault();
var pgSz = node.Descendants().Where(x => x.Name.LocalName == "pgSz").Select(x => new
{
w = (int)x.Attributes().Where(y => y.Name.LocalName == "w").FirstOrDefault(),
h = (int)x.Attributes().Where(y => y.Name.LocalName == "h").FirstOrDefault()
}).FirstOrDefault();
var pgMar = node.Descendants().Where(x => x.Name.LocalName == "pgMar").Select(x => new
{
top = (int)x.Attributes().Where(y => y.Name.LocalName == "top").FirstOrDefault(),
right = (int)x.Attributes().Where(y => y.Name.LocalName == "right").FirstOrDefault(),
bottom = (int)x.Attributes().Where(y => y.Name.LocalName == "bottom").FirstOrDefault(),
left = (int)x.Attributes().Where(y => y.Name.LocalName == "left").FirstOrDefault(),
header = (int)x.Attributes().Where(y => y.Name.LocalName == "header").FirstOrDefault(),
footer = (int)x.Attributes().Where(y => y.Name.LocalName == "footer").FirstOrDefault(),
gutter = (int)x.Attributes().Where(y => y.Name.LocalName == "gutter").FirstOrDefault()
}).FirstOrDefault();
var cols = node.Descendants().Where(x => x.Name.LocalName == "cols").Select(x => new
{
space = (int)x.Attributes().Where(y => y.Name.LocalName == "space").FirstOrDefault()
}).FirstOrDefault();
var docGrid = node.Descendants().Where(x => x.Name.LocalName == "docGrid").Select(x => new
{
linePitch = (int)x.Attributes().Where(y => y.Name.LocalName == "linePitch").FirstOrDefault()
}).FirstOrDefault();
break;
case "tr":
string trRsidR = (string)node.Attributes().Where(x => x.Name.LocalName == "rsidR").FirstOrDefault();
string trParaId = (string)node.Attributes().Where(x => x.Name.LocalName == "paraId").FirstOrDefault();
string trTextId = (string)node.Attributes().Where(x => x.Name.LocalName == "textId").FirstOrDefault();
string tRrsidTr = (string)node.Attributes().Where(x => x.Name.LocalName == "rsidTr").FirstOrDefault();
break;
default:
//should not get here
break;
}
}
}
}
}
}