我有一个以下格式的xml。 xml有2个部分(job_type =" REQUESTED" job_type =" RECOMMENDED");我只想解析Requested块中的值。 同样在Applicants标签中有两种类型的应用程序(类型=" PB"和类型=" CB")
我想将结果解析为以下格式的csv
id , social_security_number (where type = "PB"), first_name(where type = "PB"), city(where type = "PB" and item_code="CURRENT"), state_code_id(where type = "PB" and item_code="CURRENT"), com(where item_code="PEMAIL" and type ="PB"), social_security_number (where type = "CB"), first_name(where type = "CB"), city(where type = "CB" and item_code="CURRENT"), state_code_id(where type = "CB" and item_code="CURRENT"), com(where item_code="PEMAIL" and type ="CB")
例如结果:
2407132 ,999999999, Thomas, Portland, MI, edison@gmail.com, 123456789, Mary, BarHarBor, MI, mary@gmail.com
<?xml version="1.0" encoding="utf-8"?>
<JobApplications xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" id="2407132" bundle_id="2407132" version="1.0">
<JobApplication job_type="REQUESTED" request_date="2014-08-02T12:26:00.0000000">
<JobApplicationStates>
<JobApplicationState type="USEDCL" payment_call_flag="False">
<Applicants>
<Applicant social_security_number="999999999" type="PB" date_of_birth="1972-10-01T00:00:00.0000000" first_name="Thomas" last_name="Edison">
<Addresses>
<Address city="Portland" state_code_id="MI" country_code="USA" postal_code="12345" item_code="CURRENT" street_number="6297" street="LAKE ARBOR" />
<Address item_code="PREVIOUS" />
</Addresses>
<Communications>
<Communication item_code="PEMAIL" com="edison@gmail.com" contact_type="CU"/>
<Communication item_code="HOME" com="(123)-456-7890" contact_type="CU"/>
<Communication item_code="OTHER" contact_type="CU"/>
<Communication item_code="WORK" com="(100)-200-3000" contact_type="CU"/>
</Communications>
</Applicant>
<Applicant social_security_number="123456789" type="CB" date_of_birth="1976-10-01T00:00:00.0000000" first_name="Mary" last_name="Edison">
<Addresses>
<Address city="BarHarBor" state_code_id="MI" country_code="USA" postal_code="12345" item_code="CURRENT" street_number="6297" street="LAKE ARBOR" />
<Address item_code="PREVIOUS" />
</Addresses>
<Communications>
<Communication item_code="PEMAIL" com="mary@gmail.com" contact_type="CU"/>
<Communication item_code="HOME" com="(999)-456-7890" contact_type="CU"/>
<Communication item_code="OTHER" contact_type="CU"/>
<Communication item_code="WORK" com="(300)-200-3000" contact_type="CU"/>
</Communications>
</Applicant>
</Applicants>
</JobApplicationState>
</JobApplicationStates>
</JobApplication>
<JobApplication job_type="RECOMMENDED" request_date="2014-08-02T12:26:00.0000000">
<JobApplicationStates>
<JobApplicationState type="USEDCL" payment_call_flag="False">
<Applicants>
<Applicant social_security_number="999999999" type="PB" date_of_birth="1972-10-01T00:00:00.0000000" first_name="Thomas" last_name="Edison">
<Addresses>
<Address city="Portland" state_code_id="MI" country_code="USA" postal_code="12345" item_code="CURRENT" street_number="6297" street="LAKE ARBOR" />
<Address item_code="PREVIOUS" />
</Addresses>
<Communications>
<Communication item_code="PEMAIL" com="edison@gmail.com" contact_type="CU"/>
<Communication item_code="HOME" com="(123)-456-7890" contact_type="CU"/>
<Communication item_code="OTHER" contact_type="CU"/>
<Communication item_code="WORK" com="(100)-200-3000" contact_type="CU"/>
</Communications>
</Applicant>
<Applicant social_security_number="123456789" type="CB" date_of_birth="1976-10-01T00:00:00.0000000" first_name="Mary" last_name="Edison">
<Addresses>
<Address city="BarHarBor" state_code_id="MI" country_code="USA" postal_code="12345" item_code="CURRENT" street_number="6297" street="LAKE ARBOR" />
<Address item_code="PREVIOUS" />
</Addresses>
<Communications>
<Communication item_code="PEMAIL" com="mary@gmail.com" contact_type="CU"/>
<Communication item_code="HOME" com="(999)-456-7890" contact_type="CU"/>
<Communication item_code="OTHER" contact_type="CU"/>
<Communication item_code="WORK" com="(300)-200-3000" contact_type="CU"/>
</Communications>
</Applicant>
</Applicants>
</JobApplicationState>
</JobApplicationStates>
</JobApplication>
如何使用XDocument以所需格式解析任何指针?我有数百万的xmls要解析。
答案 0 :(得分:0)
Yo通常可以将xml文件压平,以便更容易放入数据库并进行读取。在你的情况下,我认为这是最好的方法。见下面的代码。我认为在解析完成后可以过滤数据表结果时,仅通过解析某些项来使解析方法复杂化是不太好的。
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;
using System.Xml;
using System.Xml.Linq;
namespace ConsoleApplication1
{
class Program
{
const string FILENAME = @"c:\temp\test.xml";
static void Main(string[] args)
{
DataTable dt = new DataTable();
dt.Columns.Add("ID", typeof(int));
dt.Columns.Add("Job Type", typeof(string));
dt.Columns.Add("Request Date", typeof(DateTime));
dt.Columns.Add("Job State Type",typeof(string));
dt.Columns.Add("Payment Call Flag", typeof(Boolean));
dt.Columns.Add("SSN", typeof(string));
dt.Columns.Add("Appliacant Type", typeof(string));
dt.Columns.Add("DOB", typeof(DateTime));
dt.Columns.Add("First Name", typeof(string));
dt.Columns.Add("Last Name", typeof(string));
dt.Columns.Add("City", typeof(string));
dt.Columns.Add("State", typeof(string));
dt.Columns.Add("Country", typeof(string));
dt.Columns.Add("Postal Code", typeof(string));
dt.Columns.Add("Street Number", typeof(string));
dt.Columns.Add("Street", typeof(string));
dt.Columns.Add("Email", typeof(string));
dt.Columns.Add("Home Phone", typeof(string));
dt.Columns.Add("Other", typeof(string));
dt.Columns.Add("Work Phone", typeof(string));
XDocument doc = XDocument.Load(FILENAME);
XElement jobApplications = doc.Root;
int id = (int)jobApplications.Attribute("id");
foreach (XElement jobApplication in jobApplications.Elements("JobApplication"))
{
string job_type = (string)jobApplication.Attribute("job_type");
DateTime request_date = (DateTime)jobApplication.Attribute("request_date");
string job_state_type = (string)jobApplication.Descendants("JobApplicationState").FirstOrDefault().Attribute("type");
Boolean payment_call_flag = (Boolean)jobApplication.Descendants("JobApplicationState").FirstOrDefault().Attribute("payment_call_flag");
foreach (XElement applicant in jobApplication.Descendants("Applicant"))
{
string social_security_number = (string)applicant.Attribute("social_security_number");
string applicant_type = (string)applicant.Attribute("type");
DateTime date_of_birth = (DateTime)applicant.Attribute("date_of_birth");
string first_name = (string)applicant.Attribute("first_name");
string last_name = (string)applicant.Attribute("last_name");
XElement address = applicant.Descendants("Address").Where(x => (string)x.Attribute("item_code") == "CURRENT").FirstOrDefault();
string city = (string)address.Attribute("city");
string state = (string)address.Attribute("state_code_id");
string country = (string)address.Attribute("country_code");
string postal_code = (string)address.Attribute("postal_code");
string street_number = (string)address.Attribute("street_number");
string street = (string)address.Attribute("street");
XElement communications = applicant.Descendants("Communications").FirstOrDefault();
string email = communications.Elements().Where(x => (string)x.Attribute("item_code") == "PEMAIL").Select(x => (string)x.Attribute("com")).FirstOrDefault();
string home_phone = communications.Elements().Where(x => (string)x.Attribute("item_code") == "HOME").Select(x => (string)x.Attribute("com")).FirstOrDefault();
string other = communications.Elements().Where(x => (string)x.Attribute("item_code") == "OTHER").Select(x => (string)x.Attribute("com")).FirstOrDefault();
string work_phone = communications.Elements().Where(x => (string)x.Attribute("item_code") == "WORK").Select(x => (string)x.Attribute("com")).FirstOrDefault();
dt.Rows.Add(new object[] {
id,
job_type, request_date, job_state_type, payment_call_flag,
social_security_number, applicant_type, date_of_birth, first_name, last_name,
city, state, country, postal_code, street_number, street,
email, home_phone, other, work_phone
});
}
}
}
}
}
答案 1 :(得分:0)
这应该可以帮助您使用开源库Cinchoo ETL
将xml文件转换为几行代码的csv由于xml是结构化格式的文件,因此您需要展平以生成CSV。在XPath和Cinchoo ETL库的帮助下,您可以快速生成CSV文件。
下面的示例代码显示了如何解析xml并将其转换为csv
using (var parser = new ChoXmlReader("sample.xml").WithXPath("JobApplications")
.WithField("ID", xPath: "@id")
.WithField("PB_SSN", xPath: "/JobApplication[@job_type='REQUESTED']/JobApplicationStates/JobApplicationState/Applicants/Applicant[@type='PB']/@social_security_number")
.WithField("PB_FIRST_NAME", xPath: "/JobApplication[@job_type='REQUESTED']/JobApplicationStates/JobApplicationState/Applicants/Applicant[@type='PB']/@first_name")
.WithField("PB_CITY", xPath: "/JobApplication[@job_type='REQUESTED']/JobApplicationStates/JobApplicationState/Applicants/Applicant[@type='PB']/Addresses/Address[@item_code='CURRENT']/@city")
.WithField("PB_STATE", xPath: "/JobApplication[@job_type='REQUESTED']/JobApplicationStates/JobApplicationState/Applicants/Applicant[@type='PB']/Addresses/Address[@item_code='CURRENT']/@state_code_id")
.WithField("PB_PEMAIL", xPath: "/JobApplication[@job_type='REQUESTED']/JobApplicationStates/JobApplicationState/Applicants/Applicant[@type='PB']/Communications/Communication[@item_code='PEMAIL']/@com")
.WithField("CB_SSN", xPath: "/JobApplication[@job_type='REQUESTED']/JobApplicationStates/JobApplicationState/Applicants/Applicant[@type='CB']/@social_security_number")
.WithField("CB_FIRST_NAME", xPath: "/JobApplication[@job_type='REQUESTED']/JobApplicationStates/JobApplicationState/Applicants/Applicant[@type='CB']/@first_name")
.WithField("CB_CITY", xPath: "/JobApplication[@job_type='REQUESTED']/JobApplicationStates/JobApplicationState/Applicants/Applicant[@type='CB']/Addresses/Address[@item_code='CURRENT']/@city")
.WithField("CB_STATE", xPath: "/JobApplication[@job_type='REQUESTED']/JobApplicationStates/JobApplicationState/Applicants/Applicant[@type='CB']/Addresses/Address[@item_code='CURRENT']/@state_code_id")
.WithField("CB_PEMAIL", xPath: "/JobApplication[@job_type='REQUESTED']/JobApplicationStates/JobApplicationState/Applicants/Applicant[@type='CB']/Communications/Communication[@item_code='PEMAIL']/@com")
)
{
using (var writer = new ChoCSVWriter("sample.csv"))
writer.Write(parser);
}
输出:
2407132,999999999,Thomas,Portland,MI,edison@gmail.com,123456789,Mary,BarHarBor,MI,mary@gmail.com
披露:我是这个图书馆的作者。