我正在尝试将这file从IESO导入到python,并将其转换为Pandas数据框。我可以打印dtypes(所有对象),但是我无法正确访问或打印数据。有人知道我做错了什么吗?
这是我使用的代码,灵感来自我发现的在线资源:
import requests
# Gather XML Data
user_agent_url = 'http://reports.ieso.ca/public/OntarioZonalDemand/PUB_OntarioZonalDemand_20180824.xml'
xml_data = requests.get(user_agent_url).content
# Parse XML Data
import xml.etree.ElementTree as ET
import pandas as pd
class XML2DataFrame:
def __init__(self,xml_data):
self.root = ET.XML(xml_data)
# Return a list of dictionaries from the text and attributes of the childen under this XML root
def parse_root(self,root):
return[self.parse_element(child) for child in iter(root)]
# Collect {key:attribute} and {tag:text} from this XML element and all its children into a single dictionary of strings.
def parse_element(self,element,parsed=None):
if parsed is None:
parsed = dict()
for key in element.keys():
if key not in parsed:
parsed[key] = element.attrib.get(key)
else:
raise ValueError('duplicate attribute {0} at element {1}'.format(key,element.getroottree().getpath(element)))
if element.text:
parsed[element.tag] = element.text
# Apply recursion
for child in list(element):
self.parse_element(child,parsed)
return parsed
# Initiate the root XML, parse it, and return a dataframe
def process_data(self):
structure_data = self.parse_root(self.root)
return pd.DataFrame(structure_data)
xml2df = XML2DataFrame(xml_data)
xml_dataframe = xml2df.process_data()
#print(xml_dataframe.dtypes)
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
print(xml_dataframe)