我从数据库(MSSQL2014)生成XML输出,现在想将数据和XSD架构信息一起用于R数据框。
数据来源:select top 1 *
from person.person as p
join person.EmailAddress as ea on p.businessEntityID = ea.businessentityID
join person.PersonPhone as pphone on p.businessEntityID = pphone.businessentityID
for XML AUTO, ELEMENTS, XMLSCHEMA('person');
执行查询:
library(XML)
(xml_data <- xmlParse(gsub(" ", "", "C:\\dissertation\\smta\\indata\\01_Source_Query.XML", fixed = TRUE), asText = TRUE))
xml_attrib <- xpathSApply(doc=xml, path="//person", xmlAttrs)
df2 <- data.frame(t(xml_attrib))
df2
以下是文件生成。我试过这个R代码导入但是不成功。有没有人有指导/提示指出我正确的方向?
RCODE:
<xsd:schema targetNamespace="person" xmlns:schema="person" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:sqltypes="http://schemas.microsoft.com/sqlserver/2004/sqltypes" elementFormDefault="qualified">
<xsd:import namespace="http://schemas.microsoft.com/sqlserver/2004/sqltypes" schemaLocation="http://schemas.microsoft.com/sqlserver/2004/sqltypes/sqltypes.xsd" />
<xsd:import namespace="http://schemas.microsoft.com/sqlserver/2004/07/adventure-works/ContactInfo" />
<xsd:import namespace="http://schemas.microsoft.com/sqlserver/2004/07/adventure-works/ContactRecord" />
<xsd:import namespace="http://schemas.microsoft.com/sqlserver/2004/07/adventure-works/ContactTypes" />
<xsd:import namespace="http://schemas.microsoft.com/sqlserver/2004/07/adventure-works/IndividualSurvey" />
<xsd:element name="p">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="BusinessEntityID" type="sqltypes:int" />
<xsd:element name="PersonType">
<xsd:simpleType>
<xsd:restriction base="sqltypes:nchar" sqltypes:localeId="1033" sqltypes:sqlCompareOptions="IgnoreCase IgnoreKanaType IgnoreWidth" sqltypes:sqlSortId="52">
<xsd:maxLength value="2" /></xsd:restriction>
</xsd:simpleType>
</xsd:element>
<xsd:element name="NameStyle" type="sqltypes:bit" sqltypes:sqlTypeAlias="[AdventureWorks2014].[dbo].[NameStyle]" />
<xsd:element name="Title" minOccurs="0">
<xsd:simpleType>
<xsd:restriction base="sqltypes:nvarchar" sqltypes:localeId="1033" sqltypes:sqlCompareOptions="IgnoreCase IgnoreKanaType IgnoreWidth" sqltypes:sqlSortId="52">
<xsd:maxLength value="8" /></xsd:restriction>
</xsd:simpleType>
</xsd:element>
<xsd:element name="FirstName">
<xsd:simpleType sqltypes:sqlTypeAlias="[AdventureWorks2014].[dbo].[Name]">
<xsd:restriction base="sqltypes:nvarchar" sqltypes:localeId="1033" sqltypes:sqlCompareOptions="IgnoreCase IgnoreKanaType IgnoreWidth" sqltypes:sqlSortId="52">
<xsd:maxLength value="50" /></xsd:restriction>
</xsd:simpleType>
</xsd:element>
<xsd:element name="MiddleName" minOccurs="0">
<xsd:simpleType sqltypes:sqlTypeAlias="[AdventureWorks2014].[dbo].[Name]">
<xsd:restri ction base="sqltypes:nvarchar" sqltypes:localeId="1033" sqltypes:sqlCompareOptions="IgnoreCase IgnoreKanaType IgnoreWidth" sqltypes:sqlSortId="52">
<xsd:maxLength value="50" /></xsd:restriction>
</xsd:simpleType>
</xsd:element>
<xsd:element name="LastName">
<xsd:simpleType sqltypes:sqlTypeAlias="[AdventureWorks2014].[dbo].[Name]">
<xsd:restriction base="sqltypes:nvarchar" sqltypes:localeId="1033" sqltypes:sqlCompareOptions="IgnoreCase IgnoreKanaType IgnoreWidth" sqltypes:sqlSortId="52">
<xsd:maxLength value="50" /></xsd:restriction>
</xsd:simpleType>
</xsd:element>
<xsd:element name="Suffix" minOccurs="0">
<xsd:simpleType>
<xsd:restriction base="sqltypes:nvarchar" sqltypes:localeId="1033" sqltypes:sqlCompareOptions="IgnoreCase IgnoreKanaType IgnoreWidth" sqltypes:sqlSortId="52">
<xsd:maxLength value="10" /></xsd:restriction>
</xsd:simpleType>
</xsd:element>
<xsd:element name="EmailPromotion" type="sqltypes:int" />
<xsd:element name="AdditionalContactInfo" minOccurs="0">
<xsd:complexType sqltypes:xmlSchemaCollection="[AdventureWorks2014].[Person].[AdditionalContactInfoSchemaCollection]">
<xsd:complexContent>
<xsd:restriction base="sqltypes:xml">
<xsd:sequence>
<xsd:any processContents="strict" minOccurs="0" maxOccurs="unbounded" namespace="http://schemas.microsoft.com/sqlserver/2004/07/adventure-works/ContactInfo http://schemas.microsoft.com/sqlserver/2004/07/adventure-works/ContactRecord http://schemas.microsoft.com/sqlserver/2004/07/adventure-works/ContactTypes"
/></xsd:sequence>
</xsd:restriction>
</xsd:complexContent>
</xsd:complexType>
</xsd:element>
<xsd:element name="Demographics" minOccurs="0">
<xsd:complexType sqltypes:xmlSchemaCollection="[AdventureWorks2014].[Person].[IndividualSurveySchemaCollection]">
<xsd:complexContent>
<xsd:restriction base="sqltypes:xml">
<xsd:sequence>
<xsd:any processContents="strict" minOccurs="0" maxOccurs="unbounded" namespace="http://schemas.microsoft.com/sqlserver/2004/07/adventure-works/IndividualSurvey" /></xsd:sequence>
</xsd:restriction>
</xsd:complexContent>
</xsd:complexType>
</xsd:element>
<xsd :element name="rowguid" type="sqltypes:uniqueidentifier" />
<xsd:element name="ModifiedDate" type="sqltypes:datetime" />
<xsd:element ref="schema:ea" minOccurs="0" maxOccurs="unbounded" /></xsd:sequence>
</xsd:complexType>
</xsd:element>
<xsd:element name="ea">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="BusinessEntityID" type="sqltypes:int" />
<xsd:element name="EmailAddressID" type="sqltypes:int" />
<xsd:element name="EmailAddress" minOccurs="0">
<xsd:simpleType>
<xsd:restriction base="sqltypes:nvarchar" sqltypes:localeId="1033" sqltypes:sqlCompareOptions="IgnoreCase IgnoreKanaType IgnoreWidth" sqltypes:sqlSortId="52">
<xsd:maxLength value="50" /></xsd:restriction>
</xsd:simpleType>
</xsd:element>
<xsd:element name="rowguid" type="sqltypes:uniqueidentifier" />
<xsd:element name="ModifiedDate" type="sqltypes:datetime" />
<xsd:element ref="schema:pphone" minOccurs="0" maxOccurs="unbounded" /></xsd:sequence>
</xsd:complexType>
</xsd:element>
<xsd:element name="pphone">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="BusinessEntityID" type="sqltypes:int" />
<xsd:element name="PhoneNumber">
<xsd:simpleType sqltypes:sqlTypeAlias="[AdventureWorks2014].[dbo].[Phone]">
<xsd:restriction base="sqltypes:nvarchar" sqltypes:localeId="1033" sqltypes:sqlCompareOptions="IgnoreCase IgnoreKanaType IgnoreWidth" sqltypes:sqlSortId="52">
<xsd:maxLength value="25" /></xsd:restriction>
</xsd:simpleType>
</xsd:element>
<xsd:element name="PhoneNumberTypeID" type="sqltypes:int" />
<xsd:element name="ModifiedDate" type="sqltypes:datetime" /></xsd:sequence>
</xsd:complexType>
</xsd:element>
</xsd:schema>
<p xmlns="person">
<BusinessEntityID>1</BusinessEntityID>
<PersonType>EM</PersonType>
<NameStyle>0</NameStyle>
<FirstName>Ken</FirstName>
<MiddleName>J</MiddleName>
<LastName>Sánchez</LastName>
<EmailPromotion>0</EmailPromotion>
<Demographics>
<IndividualSurvey xmlns="http://schemas.microsoft.com/sqlserver/2004/07/adventure-works/IndividualSurvey">
<TotalPurchaseYTD>0</TotalPurchaseYTD>
</IndividualSurvey>
</Demographics>
<rowguid>92C4279F-1207-48A3-8448-4636514EB7E2</rowgu id>
<ModifiedDate>2009-01-07T00:00:00</ModifiedDate>
<ea>
<BusinessEntityID>1</BusinessEntityID>
<EmailAddressID>1</EmailAddressID>
<EmailAddress>ken0@adventure-works.com</EmailAddress>
<rowguid>8A1901E4-671B-431A-871C-EADB2942E9EE</rowguid>
<ModifiedDate>2009-01-07T00:00:00</ModifiedDate>
<pphone>
<BusinessEntityID>1</BusinessEntityID>
<PhoneNumber>697-555-0142</PhoneNumber>
<PhoneNumberTypeID>1</PhoneNumberTypeID>
<ModifiedDate>2009-01-07T00:00:00</ModifiedDate>
</pphone>
</ea>
</p>
{{1}}
答案 0 :(得分:0)
我更喜欢rest / xml2包来解析文件。
library(xml2)
library(rvest)
page<-read_html("C:\\dissertation\\smta\\indata\\01_Source_Query.XML")
persons<-html_nodes(page, xpath = "//p")
fieldnames<-xml_name(xml_find_all(persons, ".//*"))
fields<-xml_text(xml_find_all(persons, ".//*"))
df<-data.frame(fieldnames, fields)
该文件似乎对html感兴趣。上面的代码读取数据文件,找到人员的段落标记
,并提取字段名称和值,并将它们放入数据框中。如果文件中有多个人,那么定义字段名称和值的行将需要进行矢量化(最有可能是供应)。需要进行一些清理才能删除添加到最终数据框中的一些无关行 祝你好运。