我收到下面给出的错误消息,并且相同的元素组织在其他程序中工作得很好。我尝试了很多解决方案,但似乎没有用。
html = browse.open(url).read()
File "build\bdist.win32\egg\mechanize\_mechanize.py", line 203, in open
File "build\bdist.win32\egg\mechanize\_mechanize.py", line 230, in _mech_open
File "build\bdist.win32\egg\mechanize\_opener.py", line 188, in open
File "build\bdist.win32\egg\mechanize\_urllib2_fork.py", line 1062, in do_request_
ValueError: too many values to unpack
以下是代码....请告知。
import mechanize
from BeautifulSoup import BeautifulSoup
import readability
from readability.readability import Document
import re
import urlparse
import urllib
import json
import gzip
"""Functions"""
def translateString(homeLanguage, targetLanguage, transText):
post_url = "http://translate.google.com/translate_a/t"
br = mechanize.Browser()
br.set_handle_robots(False)
br.addheaders = [("User-agent","Firefox")]
# these are the paramaters you have got from the aforementioned tools
parameters = {'client':'t',
'text':transText,
'h1':homeLanguage,
's1':homeLanguage,
't1':targetLanguage,
'ie':'UTF-8',
'oe':'UTF-8',
'multires': '1',
'otf': '2',
'pc': '0',
'ssel':'0',
'tsel':'0'
}
#Encode the parameters
data = urllib.urlencode(parameters)
#submit the form (POST request). you get the post_url and the request type(POST/GET)
#the same way with the parameters
trans_array = br.open(post_url,data).read().decode('UTF-8')
#Submit the form (GET request)
trans_string = ""
sections = trans_array.split("]]")
secarray = sections[0].replace("[[[","").replace("],
[","").replace('""',"").split('"')
co = -1
for thing in secarray:
if co %6 == 0:
trans_string += thing
co +=1
print trans_string
def getReadableArticle(url):
browse = mechanize.Browser()
browse.set_handle_robots(False)
browse.addheaders = [('User-agent'),('Firefox')]
html = browse.open(url).read()
readable_article = Document(html).summary()
readable_title = Document(html).short_title()
soup = BeautifulSoup(readable_article)
final_article = soup.text
links = soup.findAll('img', src = True)
title_article = []
title_article.append(final_article)
title_article.append(readable_title)
return title_article
def main():
url = "http://www.nytimes.com/2015/01/13/sports/ncaafootball/ohio-state-upsets-
oregon-and-wins-national-championship.html?
hp&action=click&pgtype=Homepage&module=second-column-region®ion=top-news&WT.nav=top-
news"
article_attrs = getReadableArticle(url)
readable_article = article_attrs[0]
readable_title = article_attrs[1]
chinese = translateString("en","zh-CN", str(readable_article))
english = translateString("zh-CN","en",chinese)
print readable_article[0:50]
print chinese[0:50]
print english[0:50]
if __name__ == '__main__': main()
答案 0 :(得分:0)
在你的错误信息中,browse.open(url).read()会返回太多的值,但是你 只需给一个值来保存它们,所以你得到一个错误,如下:
>>> a = [1,2,3,4]
>>> b,c = a
Traceback (most recent call last):
File "<string>", line 1, in <module>
ValueError: too many values to unpack (expected 2)
因此,打印出browse.open(url).read()的值,并确保值数字与&#39; =&#39;之前的函数返回长度相匹配。没关系。
答案 1 :(得分:0)
browse.open(url).read()完成了这个伎俩。感谢@Zoosuck。