我有这张用python编写的代码:
import urllib
from BeautifulSoup import BeautifulSoup
from yattag import Doc, indent
urls = []
sock1 = urllib.urlopen("http://www.tubtun.com/videos/")
htmlSource1 = sock1.read()
sock1.close()
soup1 = BeautifulSoup(htmlSource1)
f = open ('sitemap.xml','w');
for i in soup1.findAll('a'):
if (i.get('href')):
if (i["href"].find("http://www.tubtun.com/video/") == 0):
if(i["href"][0:1]=="u'"):
i["href"]=i["href"][2:]
sock = urllib.urlopen(i["href"])
htmlSource = sock.read()
sock.close()
soup = BeautifulSoup(htmlSource)
thumbnailUrl = soup.find('meta',{'itemprop':"thumbnailUrl"})
name = soup.find('meta',{'itemprop':"name"})
description = soup.find('meta',{'itemprop':"description"})
duration = soup.find('meta',{'itemprop':"duration"})
contentURL = soup.find('meta',{'itemprop':"contentURL"})
embedURL = soup.find('meta',{'itemprop':"embedURL"})
uploadDate = soup.find('meta',{'itemprop':"uploadDate"})
vif = soup.find('meta',{'property':"og:video"})
doc, tag, text = Doc().tagtext()
with tag('url'):
with tag('loc'):
text(i["href"])
with tag('video:video'):
with tag('video:title'):
text(name["content"])
with tag('video:description'):
text(description["content"])
with tag('video:thumbnail_loc'):
text(thumbnailUrl["content"])
with tag('video:player_loc'):
text(vif["content"])
with tag('video:publication_date'):
text(uploadDate["content"][0:10])
result = indent(
doc.getvalue(),
indentation = ' '*4,
newline = '\r\n'
)
f.write(result)
f.close()
该脚本为tubtun.com网站生成站点地图视频。
运行脚本时出现此错误:
unicodeencodeerror' ascii'编解码器不能编码字符
解决这个问题的任何解决方案?