我看到类似的问题,但我认为我的有点不同。试图解决方案所说的几乎是什么。 我尝试从此页面创建rss Feed" http://www.taraf.com.tr/author/hayko-bagdat" 我得到了run.py:
import tar
import PyRSS2Gen
import locale
from datetime import datetime
locale.setlocale(locale.LC_ALL,'')
rss = PyRSS2Gen.RSS2(title='ff',link='',description='',lastBuildDate='',items = [])
sonit=[]
sonit+=tar.parse('http://www.taraf.com.tr/author/hayko-bagdat')
print "************************"
for k in sonit:
print k.zaman
rss.items.append(PyRSS2Gen.RSSItem(
title = k.baslik,
link = k.link,
description = k.aciklama,
guid = PyRSS2Gen.Guid(k.link),
pubDate = datetime.strptime(str(k.time),u'%d %B %Y %H:%M'),))
rss.write_xml(open("pyrss2gen.xml", "w"))
和tar.py
from bs4 import BeautifulSoup
import urllib2
import codecs
from datetime import datetime
import locale
import lxml.html
from BeautifulSoup import UnicodeDammit
import PyRSS2Gen
locale.setlocale(locale.LC_ALL, '')
import requests
import sys
class yenitem:
baslik= ""
link=""
aciklama=""
time=""
def parse(url):
request=urllib2.Request(url)
request.add_header('Accept-Encoding','utf-8')
response=urllib2.urlopen(request)
soup = BeautifulSoup(response.read().decode('utf-8', 'ignore'))
items=[]
for link in soup.find_all('article',{'class':'item-list'}):
item=yenitem()
item.baslik=link.find_all('h2')[0].get_text()
item.link= link.find_all('a')[0].get('href')
item.aciklama= link.find_all('div')[1].get_text()
item.time= link.find_all('span')[0].get_text()
print item.time
items.append(item)
return items
这很奇怪,因为它会打印" time"首先正确使用项目,但在向pubDate添加datetime时会出现问题。这是控制台输出:
24 Ocak 2015 00:00
21 Ocak 2015 00:00
17 Ocak 2015 00:00
14 Ocak 2015 00:00
10 Ocak 2015 00:00
07 Ocak 2015 00:00
03 Ocak 2015 00:00
31 Aralık 2014 00:00
27 Aralık 2014 00:00
24 Aralık 2014 00:00
************************
24 Ocak 2015 00:00
21 Ocak 2015 00:00
17 Ocak 2015 00:00
14 Ocak 2015 00:00
10 Ocak 2015 00:00
07 Ocak 2015 00:00
03 Ocak 2015 00:00
31 Aralık 2014 00:00
Traceback (most recent call last):
File "C:\Users\bigM\Desktop\yeni\runk.py", line 18, in <module>
pubDate = datetime.strptime(str(k.time),u'%d %B %Y %H:%M'),))
UnicodeEncodeError: 'ascii' codec can't encode character u'\u0131' in position 7: ordinal not in range(128)