我得到一个Typecast - 错误,当我尝试转换unicode-strings,从网站解析。 以下是相关的代码:
# special IPython command to prepare the notebook for matplotlib
%matplotlib inline
from fnmatch import fnmatch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from pattern import web
# set some nicer defaults for matplotlib
from matplotlib import rcParams
#these colors come from colorbrewer2.org. Each is an RGB triplet
dark2_colors = [(0.10588235294117647, 0.6196078431372549, 0.4666666666666667),
(0.8509803921568627, 0.37254901960784315, 0.00784313725490196),
(0.4588235294117647, 0.4392156862745098, 0.7019607843137254),
(0.9058823529411765, 0.1607843137254902, 0.5411764705882353),
(0.4, 0.6509803921568628, 0.11764705882352941),
(0.9019607843137255, 0.6705882352941176, 0.00784313725490196),
(0.6509803921568628, 0.4627450980392157, 0.11372549019607843),
(0.4, 0.4, 0.4)]
rcParams['figure.figsize'] = (10, 6)
rcParams['figure.dpi'] = 150
rcParams['axes.color_cycle'] = dark2_colors
rcParams['lines.linewidth'] = 2
rcParams['axes.grid'] = True
rcParams['axes.facecolor'] = '#eeeeee'
rcParams['font.size'] = 14
rcParams['patch.edgecolor'] = 'none'
def get_poll_xml(id):
poll = requests.get("http://charts.realclearpolitics.com/charts/"+str(id)+".xml").text
return poll
import re
def _strip(s):
return re.sub(r'[\W_]+', '', s)
def plot_colors(xml):
dom = web.Element(xml)
result = {}
for graph in dom.by_tag('graph'):
title = _strip(graph.attributes['title'])
result[title] = graph.attributes['color']
return result
def rcp_poll_data(xml):
dates = []
graph1 = []
graph2 = []
data = web.Element(xml)
for item in data.by_tag('series')[0].by_tag('value'):
dates.append(item.content)
for item in data.by_tag('graph')[0].by_tag('value'):
graph1.append(float(unicode(item.content)))
for item in data.by_tag('graph')[1].by_tag('value'):
graph2.append(float(unicode(item.content)))
return pd.DataFrame({'Date': pd.to_datetime(dates), data.by_tag('graph')[0].title: graph1, data.by_tag('graph')[1].title: graph2})
执行此代码时:
print rcp_poll_data(get_poll_xml(1044))
然后我收到以下错误:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-8-76ebd15c4a1d> in <module>()
----> 1 print rcp_poll_data(get_poll_xml(1044))
<ipython-input-7-02e6590229d7> in rcp_poll_data(xml)
53 dates.append(item.content)
54 for item in data.by_tag('graph')[0].by_tag('value'):
---> 55 graph1.append(float(unicode(item.content)))
56 for item in data.by_tag('graph')[1].by_tag('value'):
57 graph2.append(float(unicode(item.content)))
ValueError: could not convert string to float:
我已尝试直接转换为float(item.count)
或通过字符串float(string(item.count))
,但我总是遇到类型错误...
如果有人可以提供帮助或者可以向我发送主题,我可以在那里找到解决方案,我会很高兴的。我不知道在哪里寻找错误。
THX
答案 0 :(得分:0)
我检查了您在代码中检索到的the xml file,发现以value
开头的xid="1824"
元素没有内容,因此当您尝试转换时,您会得到一个空字符串,会引发错误它是浮动的。解决方案是在转换为float之前检查item.content
的内容是否为空。
for item in data.by_tag('graph')[0].by_tag('value'):
if item.content:
graph1.append(float(unicode(item.content)))
在上面的for循环之后,你必须进行相同的修正。