将电力需求XML导入Python

时间:2018-10-22 18:37:19

标签: xml python-3.x pandas

我正在尝试将这file从IESO导入到python,并将其转换为Pandas数据框。我可以打印dtypes(所有对象),但是我无法正确访问或打印数据。有人知道我做错了什么吗?

这是我使用的代码,灵感来自我发现的在线资源:

import requests

# Gather XML Data
user_agent_url = 'http://reports.ieso.ca/public/OntarioZonalDemand/PUB_OntarioZonalDemand_20180824.xml'
xml_data = requests.get(user_agent_url).content

# Parse XML Data
import xml.etree.ElementTree as ET 
import pandas as pd 

class XML2DataFrame:

    def __init__(self,xml_data):
        self.root = ET.XML(xml_data)

    # Return a list of dictionaries from the text and attributes of the childen under this XML root
    def parse_root(self,root):
        return[self.parse_element(child) for child in iter(root)]

    # Collect {key:attribute} and {tag:text} from this XML element and all its children into a single dictionary of strings.
    def parse_element(self,element,parsed=None):
        if parsed is None:
            parsed = dict()
        for key in element.keys():
            if key not in parsed:
                parsed[key] = element.attrib.get(key)
            else:
                raise ValueError('duplicate attribute {0} at element {1}'.format(key,element.getroottree().getpath(element)))

        if element.text:
            parsed[element.tag] = element.text

        # Apply recursion
        for child in list(element):
            self.parse_element(child,parsed)
        return parsed

    # Initiate the root XML, parse it, and return a dataframe
    def process_data(self):
        structure_data = self.parse_root(self.root)
        return pd.DataFrame(structure_data)

xml2df = XML2DataFrame(xml_data)
xml_dataframe = xml2df.process_data()

#print(xml_dataframe.dtypes)
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(xml_dataframe)

0 个答案:

没有答案