我对这个剧本的问题是我必须继续打电话.encode('utf-8')
,这对我来说似乎并不是很好。我一定是做错了。
# -*- coding: utf-8 -*-
""" Simple rss to html converter """
__version__ = "0.0.1"
__author__ = "Ricky L Wilson"
import StringIO
from feedparser import parse as parsefeed
from bs4 import BeautifulSoup as bs
def entry2html(**kwargs):
""" Format feedparser entry """
title = kwargs['title'].encode('utf-8')
link = kwargs['link'].encode('utf-8')
description = kwargs['description'].encode('utf-8')
template = """
<h2 class='title'>{title}</h2>
<a class='link' href='{link}'>{title}</a>
<span class='description'>{description}</span>
"""
return template.format(title=title, link=link, description=description)
def convert_feed(**kwargs):
""" Main loop """
out = StringIO.StringIO("")
for entry in parsefeed(kwargs['url']).entries:
title = entry['title']
link = entry['link']
description = entry['description']
print >> out, entry2html(title=title, link=link, description=description)
return bs(out.getvalue(), 'lxml').prettify()
def save_file(url, fname):
''' Save data to disc'''
with open(fname, 'w') as file_object:
file_object.write(convert_feed(url=url).encode('utf-8'))
print save_file('http://stackoverflow.com/feeds', 'index.html')
PS。如果我删除它们中的任何一个,脚本将会中断。
我尝试将# -*- coding: utf-8 -*-
放在文件的顶部但是没有修复它。我收到了这个错误。
C:\Python27>python rss2html.py
File "rss2html.py", line 40
save_file('http://stackoverflow.com/feeds', 'index.html')
^
SyntaxError: invalid syntax
C:\Python27>python rss2html.py
Traceback (most recent call last):
File "rss2html.py", line 40, in <module>
save_file('http://stackoverflow.com/feeds', 'index.html')
File "rss2html.py", line 38, in save_file
file_object.write(convert_feed(url=url))
File "rss2html.py", line 32, in convert_feed
print >> out, entry2html(title=title, link=link, description=description)
File "rss2html.py", line 22, in entry2html
return template.format(title=title, link=link, description=description)
UnicodeEncodeError: 'ascii' codec can't encode character u'\xa3' in position 172: ordinal not in range(128)
答案 0 :(得分:1)
你的kwargs是unicode字符串。您应该使用u
前缀将模板定义为unicode字符串,然后在结尾处编码一次。
template = u"""
<h2 class='title'>{title}</h2>
<a class='link' href='{link}'>{title}</a>
<span class='description'>{description}</span>
"""
return template.format(title=title, link=link, description=description).encode('utf-8')
答案 1 :(得分:0)
我想通了,让我知道你们的想法。
这种方法比所有那些.encode(&#39; utf-8&#39;)调用更好,或者这是一个黑客攻击,它们仍然是消除.encode('utf-8')
调用的更加pythonic的方式。
以下是新脚本
# -*- coding: utf-8 -*-
""" Simple rss to html converter """
__version__ = "0.0.1"
__author__ = "Ricky L Wilson"
import StringIO
from feedparser import parse as parsefeed
from bs4 import BeautifulSoup as bs
def flatten_unicode_keys(d):
'''pass unicode keywords to **kwargs '''
for k in d:
if isinstance(k, unicode):
v = d[k]
del d[k]
d[str(k)] = v
def entry2html(**kwargs):
""" Format feedparser entry """
flatten_unicode_keys(kwargs)
title = kwargs['title']
link = kwargs['link']
description = kwargs['description']
template = """
<h2 class='title'>{title}</h2>
<a class='link' href='{link}'>{title}</a>
<span class='description'>{description}</span>
"""
return template.format(title=title, link=link, description=description)
def convert_feed(**kwargs):
""" Main loop """
out = StringIO.StringIO("")
for entry in parsefeed(kwargs['url']).entries:
title = entry['title']
link = entry['link']
description = entry['description']
print >> out, entry2html(title=title, link=link, description=description)
return bs(out.getvalue(), 'lxml').prettify()
def save_file(url, fname):
''' Save data to disc'''
with open(fname, 'w') as file_object:
file_object.write(convert_feed(url=url))
save_file('http://stackoverflow.com/feeds', 'index.html')
答案 2 :(得分:0)
所以这里是最新版本的脚本flatten_unicode_keys(d)
不再StringIO
而且不再for loops
我认为这比过去的版本更加pythonic。它也更快。
# -*- coding: utf-8 -*-
"""Simple RSS to HTML converter."""
__version__ = "0.0.2"
__author__ = "Ricky L Wilson"
from bs4 import BeautifulSoup
from feedparser import parse as parse_feed
TEMPLATE = u"""
<h2 class='title'>{title}</h2>
<a class='link' href='{link}'>{title}</a>
<span class='description'>{summary}</span>
"""
def entry_to_html(**kwargs):
"""Formats feedparser entry."""
return TEMPLATE.format(**kwargs).encode('utf-8')
def convert_feed(url):
"""Main loop."""
html_fragments = [entry_to_html(**entry) for entry in parse_feed(url).entries]
return BeautifulSoup("\n".join(html_fragments), 'lxml').prettify()
def save_file(url, filename):
"""Saves data to disc."""
with open(filename, 'w') as file_object:
file_object.write(convert_feed(url).encode('utf-8'))
if __name__ == '__main__':
save_file('http://stackoverflow.com/feeds', 'index.html')
with open('index.html') as fobj:
print fobj.read()